Commit Graph

1066080 Commits

Author SHA1 Message Date
Vlastimil Babka bb192ed9aa mm/slub: Convert most struct page to struct slab by spatch
The majority of conversion from struct page to struct slab in SLUB
internals can be delegated to a coccinelle semantic patch. This includes
renaming of variables with 'page' in name to 'slab', and similar.

Big thanks to Julia Lawall and Luis Chamberlain for help with
coccinelle.

// Options: --include-headers --no-includes --smpl-spacing include/linux/slub_def.h mm/slub.c
// Note: needs coccinelle 1.1.1 to avoid breaking whitespace, and ocaml for the
// embedded script

// build list of functions to exclude from applying the next rule
@initialize:ocaml@
@@

let ok_function p =
  not (List.mem (List.hd p).current_element ["nearest_obj";"obj_to_index";"objs_per_slab_page";"__slab_lock";"__slab_unlock";"free_nonslab_page";"kmalloc_large_node"])

// convert the type from struct page to struct page in all functions except the
// list from previous rule
// this also affects struct kmem_cache_cpu, but that's ok
@@
position p : script:ocaml() { ok_function p };
@@

- struct page@p
+ struct slab

// in struct kmem_cache_cpu, change the name from page to slab
// the type was already converted by the previous rule
@@
@@

struct kmem_cache_cpu {
...
-struct slab *page;
+struct slab *slab;
...
}

// there are many places that use c->page which is now c->slab after the
// previous rule
@@
struct kmem_cache_cpu *c;
@@

-c->page
+c->slab

@@
@@

struct kmem_cache {
...
- unsigned int cpu_partial_pages;
+ unsigned int cpu_partial_slabs;
...
}

@@
struct kmem_cache *s;
@@

- s->cpu_partial_pages
+ s->cpu_partial_slabs

@@
@@

static void
- setup_page_debug(
+ setup_slab_debug(
 ...)
 {...}

@@
@@

- setup_page_debug(
+ setup_slab_debug(
 ...);

// for all functions (with exceptions), change any "struct slab *page"
// parameter to "struct slab *slab" in the signature, and generally all
// occurences of "page" to "slab" in the body - with some special cases.

@@
identifier fn !~ "free_nonslab_page|obj_to_index|objs_per_slab_page|nearest_obj";
@@
 fn(...,
-   struct slab *page
+   struct slab *slab
    ,...)
 {
<...
- page
+ slab
...>
 }

// similar to previous but the param is called partial_page
@@
identifier fn;
@@

 fn(...,
-   struct slab *partial_page
+   struct slab *partial_slab
    ,...)
 {
<...
- partial_page
+ partial_slab
...>
 }

// similar to previous but for functions that take pointer to struct page ptr
@@
identifier fn;
@@

 fn(...,
-   struct slab **ret_page
+   struct slab **ret_slab
    ,...)
 {
<...
- ret_page
+ ret_slab
...>
 }

// functions converted by previous rules that were temporarily called using
// slab_page(E) so we want to remove the wrapper now that they accept struct
// slab ptr directly
@@
identifier fn =~ "slab_free|do_slab_free";
expression E;
@@

 fn(...,
- slab_page(E)
+ E
  ,...)

// similar to previous but for another pattern
@@
identifier fn =~ "slab_pad_check|check_object";
@@

 fn(...,
- folio_page(folio, 0)
+ slab
  ,...)

// functions that were returning struct page ptr and now will return struct
// slab ptr, including slab_page() wrapper removal
@@
identifier fn =~ "allocate_slab|new_slab";
expression E;
@@

 static
-struct slab *
+struct slab *
 fn(...)
 {
<...
- slab_page(E)
+ E
...>
 }

// rename any former struct page * declarations
@@
@@

struct slab *
(
- page
+ slab
|
- partial_page
+ partial_slab
|
- oldpage
+ oldslab
)
;

// this has to be separate from previous rule as page and page2 appear at the
// same line
@@
@@

struct slab *
-page2
+slab2
;

// similar but with initial assignment
@@
expression E;
@@

struct slab *
(
- page
+ slab
|
- flush_page
+ flush_slab
|
- discard_page
+ slab_to_discard
|
- page_to_unfreeze
+ slab_to_unfreeze
)
= E;

// convert most of struct page to struct slab usage inside functions (with
// exceptions), including specific variable renames
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
expression E;
@@

 fn(...)
 {
<...
(
- int pages;
+ int slabs;
|
- int pages = E;
+ int slabs = E;
|
- page
+ slab
|
- flush_page
+ flush_slab
|
- partial_page
+ partial_slab
|
- oldpage->pages
+ oldslab->slabs
|
- oldpage
+ oldslab
|
- unsigned int nr_pages;
+ unsigned int nr_slabs;
|
- nr_pages
+ nr_slabs
|
- unsigned int partial_pages = E;
+ unsigned int partial_slabs = E;
|
- partial_pages
+ partial_slabs
)
...>
 }

// this has to be split out from the previous rule so that lines containing
// multiple matching changes will be fully converted
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
@@

 fn(...)
 {
<...
(
- slab->pages
+ slab->slabs
|
- pages
+ slabs
|
- page2
+ slab2
|
- discard_page
+ slab_to_discard
|
- page_to_unfreeze
+ slab_to_unfreeze
)
...>
 }

// after we simply changed all occurences of page to slab, some usages need
// adjustment for slab-specific functions, or use slab_page() wrapper
@@
identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node";
@@

 fn(...)
 {
<...
(
- page_slab(slab)
+ slab
|
- kasan_poison_slab(slab)
+ kasan_poison_slab(slab_page(slab))
|
- page_address(slab)
+ slab_address(slab)
|
- page_size(slab)
+ slab_size(slab)
|
- PageSlab(slab)
+ folio_test_slab(slab_folio(slab))
|
- page_to_nid(slab)
+ slab_nid(slab)
|
- compound_order(slab)
+ slab_order(slab)
)
...>
 }

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Luis Chamberlain <mcgrof@kernel.org>
2022-01-06 12:26:02 +01:00
Matthew Wilcox (Oracle) 01b34d1631 mm/slub: Convert pfmemalloc_match() to take a struct slab
Preparatory for mass conversion. Use the new slab_test_pfmemalloc()
helper.  As it doesn't do VM_BUG_ON(!PageSlab()) we no longer need the
pfmemalloc_match_unsafe() variant.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:26:02 +01:00
Vlastimil Babka 4020b4a226 mm/slub: Convert __free_slab() to use struct slab
__free_slab() is on the boundary of distinguishing struct slab and
struct page so start with struct slab but convert to folio for working
with flags and folio_page() to call functions that require struct page.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:26:01 +01:00
Vlastimil Babka 45387b8c14 mm/slub: Convert alloc_slab_page() to return a struct slab
Preparatory, callers convert back to struct page for now.

Also move setting page flags to alloc_slab_page() where we still operate
on a struct page. This means the page->slab_cache pointer is now set
later than the PageSlab flag, which could theoretically confuse some pfn
walker assuming PageSlab means there would be a valid cache pointer. But
as the code had no barriers and used __set_bit() anyway, it could have
happened already, so there shouldn't be such a walker.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:26:01 +01:00
Matthew Wilcox (Oracle) fb012e278d mm/slub: Convert print_page_info() to print_slab_info()
Improve the type safety and prepare for further conversion. For flags
access, convert to folio internally.

[ vbabka@suse.cz: access flags via folio_flags() ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:26:01 +01:00
Vlastimil Babka 0393895b09 mm/slub: Convert __slab_lock() and __slab_unlock() to struct slab
These functions operate on the PG_locked page flag, but make them accept
struct slab to encapsulate this implementation detail.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:26:01 +01:00
Matthew Wilcox (Oracle) d835eef4fc mm/slub: Convert kfree() to use a struct slab
Convert kfree(), kmem_cache_free() and ___cache_free() to resolve object
addresses to struct slab, using folio as intermediate step where needed.
Keep passing the result as struct page for now in preparation for mass
conversion of internal functions.

[ vbabka@suse.cz: Use folio as intermediate step when checking for
  large kmalloc pages, and when freeing them - rename
  free_nonslab_page() to free_large_kmalloc() that takes struct folio ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:57 +01:00
Matthew Wilcox (Oracle) cc465c3b23 mm/slub: Convert detached_freelist to use a struct slab
This gives us a little bit of extra typesafety as we know that nobody
called virt_to_page() instead of virt_to_head_page().

[ vbabka@suse.cz: Use folio as intermediate step when filtering out
  large kmalloc pages ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:51 +01:00
Matthew Wilcox (Oracle) 0b3eb091d5 mm: Convert check_heap_object() to use struct slab
Ensure that we're not seeing a tail page inside __check_heap_object() by
converting to a slab instead of a page.  Take the opportunity to mark
the slab as const since we're not modifying it.  Also move the
declaration of __check_heap_object() to mm/slab.h so it's not available
to the wider kernel.

[ vbabka@suse.cz: in check_heap_object() only convert to struct slab for
  actual PageSlab pages; use folio as intermediate step instead of page ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:51 +01:00
Matthew Wilcox (Oracle) 7213230af5 mm: Use struct slab in kmem_obj_info()
All three implementations of slab support kmem_obj_info() which reports
details of an object allocated from the slab allocator.  By using the
slab type instead of the page type, we make it obvious that this can
only be called for slabs.

[ vbabka@suse.cz: also convert the related kmem_valid_obj() to folios ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:51 +01:00
Matthew Wilcox (Oracle) 0c24811b12 mm: Convert __ksize() to struct slab
In SLUB, use folios, and struct slab to access slab_cache field.
In SLOB, use folios to properly resolve pointers beyond
PAGE_SIZE offset of the object.

[ vbabka@suse.cz: use folios, and only convert folio_test_slab() == true
  folios to struct slab ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:51 +01:00
Matthew Wilcox (Oracle) 82c1775dc1 mm: Convert virt_to_cache() to use struct slab
This function is entirely self-contained, so can be converted from page
to slab.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:25:41 +01:00
Matthew Wilcox (Oracle) b918653b4f mm: Convert [un]account_slab_page() to struct slab
Convert the parameter of these functions to struct slab instead of
struct page and drop _page from the names. For now their callers just
convert page to slab.

[ vbabka@suse.cz: replace existing functions instead of calling them ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:40 +01:00
Matthew Wilcox (Oracle) d122019bf0 mm: Split slab into its own type
Make struct slab independent of struct page. It still uses the
underlying memory in struct page for storing slab-specific data, but
slab and slub can now be weaned off using struct page directly.  Some of
the wrapper functions (slab_address() and slab_order()) still need to
cast to struct folio, but this is a significant disentanglement.

[ vbabka@suse.cz: Rebase on folios, use folio instead of page where
  possible.

  Do not duplicate flags field in struct slab, instead make the related
  accessors go through slab_folio(). For testing pfmemalloc use the
  folio_*_active flag accessors directly so the PageSlabPfmemalloc
  wrappers can be removed later.

  Make folio_slab() expect only folio_test_slab() == true folios and
  virt_to_slab() return NULL when folio_test_slab() == false.

  Move struct slab to mm/slab.h.

  Don't represent with struct slab pages that are not true slab pages,
  but just a compound page obtained directly rom page allocator (with
  large kmalloc() for SLUB and SLOB). ]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:40 +01:00
Vlastimil Babka ae16d059f8 mm/slub: Make object_err() static
There are no callers outside of mm/slub.c anymore.

Move freelist_corrupted() that calls object_err() to avoid a need for
forward declaration.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
2022-01-06 12:25:40 +01:00
Vlastimil Babka c798154311 mm/slab: Dissolve slab_map_pages() in its caller
The function no longer does what its name and comment suggests, and just
sets two struct page fields, which can be done directly in its sole
caller.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
2022-01-06 12:25:25 +01:00
Florian Westphal 23c54263ef netfilter: nft_set_pipapo: allocate pcpu scratch maps on clone
This is needed in case a new transaction is made that doesn't insert any
new elements into an already existing set.

Else, after second 'nft -f ruleset.txt', lookups in such a set will fail
because ->lookup() encounters raw_cpu_ptr(m->scratch) == NULL.

For the initial rule load, insertion of elements takes care of the
allocation, but for rule reloads this isn't guaranteed: we might not
have additions to the set.

Fixes: 3c4287f620 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: etkaar <lists.netfilter.org@prvy.eu>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2022-01-06 10:43:24 +01:00
Pablo Neira Ayuso 4e1860a386 netfilter: nft_payload: do not update layer 4 checksum when mangling fragments
IP fragments do not come with the transport header, hence skip bogus
layer 4 checksum updates.

Fixes: 1814096980 ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields")
Reported-and-tested-by: Steffen Weinreich <steve@weinreich.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2022-01-06 10:43:23 +01:00
Hangbin Liu 1585f590a2 selftests: netfilter: switch to socat for tests using -q option
The nc cmd(nmap-ncat) that distributed with Fedora/Red Hat does not have
option -q. This make some tests failed with:

	nc: invalid option -- 'q'

Let's switch to socat which is far more dependable.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2022-01-06 10:43:18 +01:00
Petr Mladek 73d86812a3 MAINTAIERS/printk: Add link to printk git
It might also help to avoid confusion with the historic
pmladek/printk.git that has got obsoleted by printk/linux.git
in February 2020.

Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20220105094157.26216-3-pmladek@suse.com
2022-01-06 10:25:57 +01:00
Petr Mladek c5b990c711 MAINTAINERS/vsprintf: Update link to printk git tree
printk git tree has moved to printk/linux.git in February 2020.

Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20220105094157.26216-2-pmladek@suse.com
2022-01-06 10:25:51 +01:00
Toke Høiland-Jørgensen 1372d34ccf xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames
Add an xdp_do_redirect_frame() variant which supports pre-computed
xdp_frame structures. This will be used in bpf_prog_run() to avoid having
to write to the xdp_frame structure when the XDP program doesn't modify the
frame boundaries.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-6-toke@redhat.com
2022-01-05 19:46:32 -08:00
Toke Høiland-Jørgensen d53ad5d8b2 xdp: Move conversion to xdp_frame out of map functions
All map redirect functions except XSK maps convert xdp_buff to xdp_frame
before enqueueing it. So move this conversion of out the map functions
and into xdp_do_redirect(). This removes a bit of duplicated code, but more
importantly it makes it possible to support caller-allocated xdp_frame
structures, which will be added in a subsequent commit.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-5-toke@redhat.com
2022-01-05 19:46:32 -08:00
Toke Høiland-Jørgensen 64693ec777 page_pool: Store the XDP mem id
Store the XDP mem ID inside the page_pool struct so it can be retrieved
later for use in bpf_prog_run().

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20220103150812.87914-4-toke@redhat.com
2022-01-05 19:46:32 -08:00
Toke Høiland-Jørgensen 35b2e54989 page_pool: Add callback to init pages when they are allocated
Add a new callback function to page_pool that, if set, will be called every
time a new page is allocated. This will be used from bpf_test_run() to
initialise the page data with the data provided by userspace when running
XDP programs with redirect turned on.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20220103150812.87914-3-toke@redhat.com
2022-01-05 19:46:32 -08:00
Toke Høiland-Jørgensen 4a48ef70b9 xdp: Allow registering memory model without rxq reference
The functions that register an XDP memory model take a struct xdp_rxq as
parameter, but the RXQ is not actually used for anything other than pulling
out the struct xdp_mem_info that it embeds. So refactor the register
functions and export variants that just take a pointer to the xdp_mem_info.

This is in preparation for enabling XDP_REDIRECT in bpf_prog_run(), using a
page_pool instance that is not connected to any network device.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220103150812.87914-2-toke@redhat.com
2022-01-05 19:46:32 -08:00
Alexei Starovoitov 640a171c93 Merge branch 'samples/bpf: xdpsock app enhancements'
Ong Boon says:

====================

First of all, sorry for taking more time to get back to this series and
thanks to all valuble feedback in series-1 at [1] from Jesper and Song
Liu.

Since then I have looked into what Jesper suggested in [2] and worked on
revising the patch series into several patches for ease of review:

v1->v2:
1/7: [No change]. Add VLAN tag (ID & Priority) to the generated Tx-Only
     frames.

2/7: [No change]. Add DMAC and SMAC setting to the generated Tx-Only
     frames. If parameters are not set, previous DMAC and SMAC are used.

3/7: [New]. Add support for selecting different CLOCK for clock_gettime()
     used in get_nsecs.

4/7: [New]. This is a total rework from series-1 3/4-patch [3]. It uses
     clock_nanosleep() suggested by Jesper. In addition, added statistic
     for Tx schedule variance under application stat (-a|--app-stats).
     Make the cyclic Tx operation and --poll mode to be mutually-
     exclusive. Still, the ability to specify TX cycle time and used
     together with batch size and packet count remain the same.

5/7: [New]. Add the support for TX process schedule policy and priority
     setting. By default, SCHED_OTHER policy is used. This too is matching
     the schedule policy setting in [2].

6/7: [Change]. This is update from series-1 4/4-patch [4]. Added TX clean
     process time-out in 1s granularity with configurable retries count
     (-O|--retries).

7/7: [New]. Added timestamp for TX packet following pktgen_hdr format
     matching the implementation in [2]. However, the sequence ID remains
     the same as it is instead of process schedule diff in [2].

To summarize on what program options have been added with v2 series
using an example below:-

 DMAC (-G)                 = fa:8d:f1:e2:0b:e8
 SMAC (-H)                 = ce:17:07:17:3e:3a

 VLAN tagged (-V)
 VLAN ID (-J)              = 12
 VLAN Pri (-K)             = 3

 Tx Queue (-q)             = 3
 Cycle Time in us (-T)     = 1000
 Batch (-b)                = 2
 Packet Count              = 6
 Tx schedule policy (-W)   = FIFO
 Tx schedule priority (-U) = 50
 Clock selection (-w)      = REALTIME

 Tx timeout retries(-O)    = 5
 Tx timestamp (-y)
 Cyclic Tx schedule stat (-a)

Note: xdpsock sets UDP dest-port and src-port to 0x1000 as default.

 Sending Board
 =============
 $ xdpsock -i eth0 -t -N -z -H ce:17:07:17:3e:3a -G fa:8d:f1:e2:0b:e8 \
   -V -J 12 -K 3 -q 3 \
   -T 1000 -b 2 -C 6 -W FIFO -U 50 -w REALTIME \
   -O 5 -y -a

  sock0@eth0:3 txonly xdp-drv
                    pps            pkts           0.00
 rx                 0              0
 tx                 0              6

                    calls/s        count
 rx empty polls     0              0
 fill fail polls    0              0
 copy tx sendtos    0              0
 tx wakeup sendtos  0              5
 opt polls          0              0

                    period     min        ave        max        cycle
 Cyclic TX          1000000    31033      32009      33397      3

 Receiving Board
 ===============
 $ tcpdump -nei eth0 udp port 0x1000 -vv -Q in -X \
    --time-stamp-precision nano
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
03:46:40.520111580 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e997 be9b e955  ...............U
        0x0020:  0000 0000 61cd 2ba1 0006 987c            ....a.+....|
03:46:40.520112163 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e996 be9b e955  ...............U
        0x0020:  0000 0001 61cd 2ba1 0006 987c            ....a.+....|
03:46:40.521066860 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e5af be9b e955  ...............U
        0x0020:  0000 0002 61cd 2ba1 0006 9c62            ....a.+....b
03:46:40.521067012 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e5ae be9b e955  ...............U
        0x0020:  0000 0003 61cd 2ba1 0006 9c62            ....a.+....b
03:46:40.522061935 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e1c5 be9b e955  ...............U
        0x0020:  0000 0004 61cd 2ba1 0006 a04a            ....a.+....J
03:46:40.522062173 ce:17:07:17:3e:3a > fa:8d:f1:e2:0b:e8, ethertype 802.1Q (0x8100), length 62: vlan 12, p 3, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto UDP (17), length 44)
    10.10.10.16.4096 > 10.10.10.32.4096: [udp sum ok] UDP, length 16
        0x0000:  4500 002c 0000 0000 4011 527e 0a0a 0a10  E..,....@.R~....
        0x0010:  0a0a 0a20 1000 1000 0018 e1c4 be9b e955  ...............U
        0x0020:  0000 0005 61cd 2ba1 0006 a04a            ....a.+....J

I have tested the above with both tagged and untagged packet format and
based on the timestamp in tcpdump found that the timing of the batch
cyclic transmission is correct.

Appreciate if community can give the patch series v2 a try and point out
any gap.

Thanks
Boon Leong

[1] https://patchwork.kernel.org/project/netdevbpf/cover/20211124091821.3916046-1-boon.leong.ong@intel.com/
[2] https://github.com/netoptimizer/network-testing/blob/master/src/udp_pacer.c
[3] https://patchwork.kernel.org/project/netdevbpf/patch/20211124091821.3916046-4-boon.leong.ong@intel.com/
[4] https://patchwork.kernel.org/project/netdevbpf/patch/20211124091821.3916046-5-boon.leong.ong@intel.com/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-05 17:53:25 -08:00
Ong Boon Leong eb68db45b7 samples/bpf: xdpsock: Add timestamp for Tx-only operation
It may be useful to add timestamp for Tx packets for continuous or cyclic
transmit operation. The timestamp and sequence ID of a Tx packet are
stored according to pktgen header format. To enable per-packet timestamp,
use -y|--tstamp option. If timestamp is off, pktgen header is not
included in the UDP payload. This means receiving side can use the magic
number for pktgen for differentiation.

The implementation supports both VLAN tagged and untagged option. By
default, the minimum packet size is set at 64B. However, if VLAN tagged
is on (-V), the minimum packet size is increased to 66B just so to fit
the pktgen_hdr size.

Added hex_dump() into the code path just for future cross-checking.
As before, simply change to "#define DEBUG_HEXDUMP 1" to inspect the
accuracy of TX packet.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230035447.523177-8-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong 8121e78932 samples/bpf: xdpsock: Add time-out for cleaning Tx
When user sets tx-pkt-count and in case where there are invalid Tx frame,
the complete_tx_only_all() process polls indefinitely. So, this patch
adds a time-out mechanism into the process so that the application
can terminate automatically after it retries 3*polling interval duration.

v1->v2:
 Thanks to Jesper's and Song Liu's suggestion.
 - clean-up git message to remove polling log
 - make the Tx time-out retries configurable with 1s granularity

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230035447.523177-7-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong fa24d0b1d5 samples/bpf: xdpsock: Add sched policy and priority support
By default, TX schedule policy is SCHED_OTHER (round-robin time-sharing).
To improve TX cyclic scheduling, we add SCHED_FIFO policy and its priority
by using -W FIFO or --policy=FIFO and -U <PRIO> or --schpri=<PRIO>.

A) From xdpsock --app-stats, for SCHED_OTHER policy:
   $ xdpsock -i eth0 -t -N -z -T 1000 -b 16 -C 100000 -a

                      period     min        ave        max        cycle
   Cyclic TX          1000000    53507      75334      712642     6250

B) For SCHED_FIFO policy and schpri=50:
   $ xdpsock -i eth0 -t -N -z -T 1000 -b 16 -C 100000 -a -W FIFO -U 50

                      period     min        ave        max        cycle
   Cyclic TX          1000000    3699       24859      54397      6250

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230035447.523177-6-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong fa0d27a1d5 samples/bpf: xdpsock: Add cyclic TX operation capability
Tx cycle time is in micro-seconds unit. By combining the batch size (-b M)
and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of
packets every N-us periodically. Cyclic TX operation is not applicable if
--poll mode is used.

To transmit 16 packets every 1ms cycle time for total of 100000 packets
silently:
 $ xdpsock -i eth0 -T -N -z -T 1000 -b 16 -C 100000

To print cyclic TX schedule variance stats, use --app-stats|-a:
 $ xdpsock -i eth0 -T -N -z -T 1000 -b 16 -C 100000 -a

 sock0@eth0:0 txonly xdp-drv
                   pps            pkts           0.00
rx                 0              0
tx                 0              100000

                   calls/s        count
rx empty polls     0              0
fill fail polls    0              0
copy tx sendtos    0              0
tx wakeup sendtos  0              6254
opt polls          0              0

                   period     min        ave        max        cycle
Cyclic TX          1000000    53507      75334      712642     6250

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230035447.523177-5-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong 5a3882542a samples/bpf: xdpsock: Add clockid selection support
User specifies the clock selection by using -w CLOCK or --clock=CLOCK
where CLOCK=[REALTIME, TAI, BOOTTIME, MONOTONIC].

The default CLOCK selection is MONOTONIC.

The implementation of clock selection parsing is borrowed from
iproute2/tc/q_taprio.c

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211230035447.523177-4-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong 6440a6c23f samples/bpf: xdpsock: Add Dest and Src MAC setting for Tx-only operation
To set Dest MAC address (-G|--tx-dmac) only:
 $ xdpsock -i eth0 -t -N -z -G aa:bb:cc:dd:ee:ff

To set Source MAC address (-H|--tx-smac) only:
 $ xdpsock -i eth0 -t -N -z -H 11:22:33:44:55:66

To set both Dest and Source MAC address:
 $ xdpsock -i eth0 -t -N -z -G aa:bb:cc:dd:ee:ff \
   -H 11:22:33:44:55:66

The default Dest and Source MAC address remain the same as before.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20211230035447.523177-3-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Ong Boon Leong 2741a0493c samples/bpf: xdpsock: Add VLAN support for Tx-only operation
In multi-queue environment testing, the support for VLAN-tag based
steering is useful. So, this patch adds the capability to add
VLAN tag (VLAN ID and Priority) to the generated Tx frame.

To set the VLAN ID=10 and Priority=2 for Tx only through TxQ=3:
 $ xdpsock -i eth0 -t -N -z -q 3 -V -J 10 -K 2

If VLAN ID (-J) and Priority (-K) is set, it default to
  VLAN ID = 1
  VLAN Priority = 0.

For example, VLAN-tagged Tx only, xdp copy mode through TxQ=1:
 $ xdpsock -i eth0 -t -N -c -q 1 -V

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211230035447.523177-2-boon.leong.ong@intel.com
2022-01-05 17:53:24 -08:00
Jakub Kicinski 4e023b44d5 Merge branch 'net-lantiq_xrx200-improve-ethernet-performance'
Aleksander Jan Bajkowski says:

====================
net: lantiq_xrx200: improve ethernet performance

This patchset improves Ethernet performance by 15%.

NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	539 Mbps	599 Mbps
After	624 Mbps	695 Mbps
====================

Link: https://lore.kernel.org/r/20220104151144.181736-1-olek2@wp.pl
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:18:07 -08:00
Aleksander Jan Bajkowski e015593573 net: lantiq_xrx200: convert to build_skb
We can increase the efficiency of rx path by using buffers to receive
packets then build SKBs around them just before passing into the network
stack. In contrast, preallocating SKBs too early reduces CPU cache
efficiency.

NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	577 Mbps	648 Mbps
After	624 Mbps	695 Mbps

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:18:04 -08:00
Aleksander Jan Bajkowski 768818d772 net: lantiq_xrx200: increase napi poll weigth
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	545 Mbps	625 Mbps
After	577 Mbps	648 Mbps

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:18:04 -08:00
Aleksander Jan Bajkowski 5112e9234b MIPS: lantiq: dma: increase descritor count
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	539 Mbps	599 Mbps
After	545 Mbps	625 Mbps

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:18:03 -08:00
Miroslav Lichvar 87eee9c558 testptp: set pin function before other requests
When the -L option of the testptp utility is specified with other
options (e.g. -p to enable PPS output), the user probably wants to
apply it to the pin configured by the -L option.

Reorder the code to set the pin function before other function requests
to avoid confusing users.

Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20220105152506.3256026-1-mlichvar@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:08:58 -08:00
Jakub Kicinski 502a2ce9cd linux-can-fixes-for-5.16-20220105
-----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCgAxFiEEK3kIWJt9yTYMP3ehqclaivrt76kFAmHWBHkTHG1rbEBwZW5n
 dXRyb25peC5kZQAKCRCpyVqK+u3vqaY0CACb2NOHCl09a20N6REAmKzdXEq6LO0C
 52fSdeAhRffWh7GqlQED7+XgzuGimsQ5DYWMzP1dzCCVuI/f1gZwrMKJU/e3rZjE
 wR9VgIwTUulauW1KMUz/R8qJP5R7xq5wKXflm9iPcDo7buzTDf441vTt8WhQILIB
 ajl94ZSzhpy4yNk+YBJObanMALRTSjBPrIlEjzDdsex/IjkPDkTm8NYe7SRWzMf3
 UQR6rEAIecKBwnl0Q3bwcOU5GL8j333Xem2bxDsnm93tjfBwEtBfBX6XM1UX2B2L
 9WooLHmJHibkmp/1vNMzlGvi9crdTrMhWnFweTaAsr29yG3JZ4r2PytO
 =bCV1
 -----END PGP SIGNATURE-----

Merge tag 'linux-can-fixes-for-5.16-20220105' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can

Marc Kleine-Budde says:

====================
pull-request: can 2022-01-05

It consists of 2 patches, both by me. The first one fixes the use of
an uninitialized variable in the gs_usb driver the other one a
skb_over_panic in the ISOTP stack in case of reception of too large
ISOTP messages.

* tag 'linux-can-fixes-for-5.16-20220105' of git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can:
  can: isotp: convert struct tpcon::{idx,len} to unsigned int
  can: gs_usb: fix use of uninitialized variable, detach device on reception of invalid USB data
====================

Link: https://lore.kernel.org/r/20220105205443.1274709-1-mkl@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-01-05 17:04:32 -08:00
Olof Johansson 8922bb6526 SoCFPGA dts updates for v5.16, part 3
- Change the SoCFPGA compatible to "intel,socfpga-qspi"
 - Update dt-bindings document to include "intel,socfpga-qspi"
 -----BEGIN PGP SIGNATURE-----
 
 iQJIBAABCgAyFiEEoHhMeiyk5VmwVMwNGZQEC4GjKPQFAmHJlCQUHGRpbmd1eWVu
 QGtlcm5lbC5vcmcACgkQGZQEC4GjKPQIvQ//dpNgaJ6yCM25J8280ip8ry2vZULV
 0+q+KCud3k3Qd8YRzy1iHShkacDaVWJCn9vaQSwV8ertnnOMVnPZJiZjcQExMte9
 lHAuwSDxEd5/hI19H4DMCqZv5xKUC6o4m2N++MrZLLtruv5K8sw2CFfC+TaqgPdi
 JK/JBj1M44tm8CzxqwPTr5abw4OqdFGgaGuK6ZNcuLe70gYJaWBo9UqUbm3efkX6
 HzrtfqykxTNwrUCtWGew/vNrTznhHMo+xz6D6fHJj5UvEBUthIkfoz2GJ/tjX8od
 qBVKE0GiyGsWjuOOTrtVAkhIfv0D3eTYWFl+6uE0J3IdpT7hq09VoRFysMznrv04
 N5T2fOpeuISNVKMPvmBIf0t0HDG5VCcEqu4rRYLEQqiuCYnd0H3Ho1DcZJKlRi5f
 naAkBIabmyMkmqAnLVmP+Dg/AMayswIXGCpLgxqXF2ucGF0k3sq7K+ZwsZxjVKkz
 QBV1elr0NBuFjThDjrtvm6pYJJTR9K8PCuwqlOMIy2OQRyFG+NoSGZrNO2nzmjIe
 JUWUsq1wUhE+EQ7ShlTU82uAbwfDsFR6L0laMr2HmhfKVJOOXZSU0/CTca5p1TgN
 0WBrxkHyYGjc+gFTszXxfOpDLmNXqkdSie4WTYOkAa+DBwrZu15xcdLnEepg8c6U
 FR5QWhvaSDA6qSs=
 =ErOv
 -----END PGP SIGNATURE-----
gpgsig -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEElf+HevZ4QCAJmMQ+jBrnPN6EHHcFAmHWNWsPHG9sb2ZAbGl4
 b20ubmV0AAoJEIwa5zzehBx3QXsP/RMpoaAC6Posg0d8N3IzwzWKf2z3uvatL7sZ
 fuj9BiduSOsSXs5wQcYyFvCXzWif8C3TgUxGnhp589ZlZVC2Mdy8rJurpg2qvRO2
 JSko7Jq0u+ct+YQef1JsS0QcCznF6pmUUoFMrfcVeThLZEJK2J+JXNyx5EbeOHts
 8Q2Tz4+UkI/4985xqrTE+WPonnPjgk8pyIQDUpQuFmMaiFE1MUKRZtBbBYZ2Wigx
 yN9GMwRVx6op+dKZa8V1tyiW/Ls7Jj7BVi3M98X0VHcf1kD2vchERVUvOQtMR4gx
 SU+AhrjE8o17D8PfPpUCB1MKzWtRkzlcRypLgCG/BrVIIXtnSIDH5X/XRTZx8vqD
 CFtdo13EbDai/7tuYPNAYXG/JsIR5uFMuV4gMhQmdyo5hqgSeQFA1AsrV6T9xBVo
 6P2Gy71S08BCLnP51tzUeegEQgu29W5aEbiumEYQvPTxNCq4AWLGG/z9phl3aMxq
 nZVT5qfipH0IE01euExAaA4Fx5K6cbzAJCpZp+z8+sJiB47Yer2p6dxgGhyA0DMh
 3DRN9NaR/3R7AoxJThmKixEVSolmihR3rc4ftaGs2lttgJmj5W80QT9biDB5KyL8
 BbJTymPQDlml3WnV7oxPGcFKUTaAzzr+K+PicbbZ/8m/91KLCsTt7NUPQdq0LAu6
 L8qsgVLO
 =p4VG
 -----END PGP SIGNATURE-----

Merge tag 'socfpga_fix_for_v5.16_part_3' of git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux into arm/fixes

SoCFPGA dts updates for v5.16, part 3
- Change the SoCFPGA compatible to "intel,socfpga-qspi"
- Update dt-bindings document to include "intel,socfpga-qspi"

* tag 'socfpga_fix_for_v5.16_part_3' of git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux: (361 commits)
  ARM: dts: socfpga: change qspi to "intel,socfpga-qspi"
  dt-bindings: spi: cadence-quadspi: document "intel,socfpga-qspi"
  Linux 5.16-rc7
  mm/hwpoison: clear MF_COUNT_INCREASED before retrying get_any_page()
  mm/damon/dbgfs: protect targets destructions with kdamond_lock
  mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid
  mm: delete unsafe BUG from page_cache_add_speculative()
  mm, hwpoison: fix condition in free hugetlb page path
  MAINTAINERS: mark more list instances as moderated
  kernel/crash_core: suppress unknown crashkernel parameter warning
  mm: mempolicy: fix THP allocations escaping mempolicy restrictions
  kfence: fix memory leak when cat kfence objects
  platform/x86: intel_pmc_core: fix memleak on registration failure
  net: stmmac: dwmac-visconti: Fix value of ETHER_CLK_SEL_FREQ_SEL_2P5M
  r8152: sync ocp base
  r8152: fix the force speed doesn't work for RTL8156
  net: bridge: fix ioctl old_deviceless bridge argument
  net: stmmac: ptp: fix potentially overflowing expression
  net: dsa: tag_ocelot: use traffic class to map priority on injected header
  veth: ensure skb entering GRO are not cloned.
  ...

Link: https://lore.kernel.org/r/20211227103644.566694-1-dinguyen@kernel.org
Signed-off-by: Olof Johansson <olof@lixom.net>
2022-01-05 16:18:50 -08:00
Olof Johansson fde9ec3c1b Reset controller fixes for v5.16, part 2
Fix pm_runtime_resume_and_get() error handling in the
 reset-rzg2l-usbphy-ctrl driver.
 -----BEGIN PGP SIGNATURE-----
 
 iI0EABYIADUWIQRRO6F6WdpH1R0vGibVhaclGDdiwAUCYdXQMRcccC56YWJlbEBw
 ZW5ndXRyb25peC5kZQAKCRDVhaclGDdiwCrJAQC/nW5YH9o0PuredqlUtha/Akpc
 jQmrDZOfHmrm8GOJiAD+OtRE1NHjgm6CXan0QYwa2Dbb+yYifvOZL/SLo5raAw8=
 =ScZk
 -----END PGP SIGNATURE-----
gpgsig -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEElf+HevZ4QCAJmMQ+jBrnPN6EHHcFAmHWNUkPHG9sb2ZAbGl4
 b20ubmV0AAoJEIwa5zzehBx3b64P+wSxOSOinbkaY/QSj57rZAHdt05OPWjQc0+p
 A6cicMiOpcF+dxRp9ZNOmAv/ViZXsHfSnZg15/tzvWT9bsQcekXHTaeCpm8d4Ku9
 LvUS2X1LtnTH/E+I1/No+W9ljJ3sEkmLFigAAh2I7gSAHrba/RQNhv4oMisw4pNP
 SLZRYuju5MLfPg4wQ8nfkq6rwkNuqfHKueyWKu0R9507oNzXzuzTQcpuZvRPVbWL
 Gpi6QRfQeC06tPM/9nbaPCaGeG12opio8rhNnuLrWqRMQHVUUbQ+VmBTM1ZlDhpv
 8UNkW2AqXUrf0ijopzKzzKLWB5iNhPtWav3T+oTppGvB4Z7jYHUwjtotlYqI6uAQ
 VqtGmObxfnY0xx+RpXWEGuCFCd43AhqdNpR0X4miyh06E0fGpgQWCEbQSDRep/iJ
 tbpDR2cfD7JB2m1asYD+72NHqe53UgCXvgt/xf+gpdR2I2BVcQxj0YfieSZvS5Hq
 FY9OTMl1xZE3SUIPi1eZmZAeqmKRaj7ZCKslgqD+pfnWU/+WEGI4f/M30h/7BAK0
 vNvnJ44T6ozPshCfiHNl2L0z6XC0bA4mvi4z7gnuXRAFA71I5Eg9s1mYqwG3UDZU
 /4JKkzr7Al6BdXZZtXjkDsCK+agyhSa95ywNbUi7XqtD/C0qybVQFfwGqJQ+/HH8
 6M2wIqZj
 =/Jsy
 -----END PGP SIGNATURE-----

Merge tag 'reset-fixes-for-v5.16-2' of git://git.pengutronix.de/pza/linux into arm/fixes

Reset controller fixes for v5.16, part 2

Fix pm_runtime_resume_and_get() error handling in the
reset-rzg2l-usbphy-ctrl driver.

* tag 'reset-fixes-for-v5.16-2' of git://git.pengutronix.de/pza/linux:
  reset: renesas: Fix Runtime PM usage
  reset: tegra-bpmp: Revert Handle errors in BPMP response

Link: https://lore.kernel.org/r/20220105172515.273947-1-p.zabel@pengutronix.de
Signed-off-by: Olof Johansson <olof@lixom.net>
2022-01-05 16:18:17 -08:00
Christy Lee 5f60826428 libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API
API created with simplistic assumptions about BPF map definitions.
It hasn’t worked for a while, deprecate it in preparation for
libbpf 1.0.

  [0] Closes: https://github.com/libbpf/libbpf/issues/302

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220105003120.2222673-1-christylee@fb.com
2022-01-05 16:11:32 -08:00
Christy Lee 9855c131b9 libbpf 1.0: Deprecate bpf_map__is_offload_neutral()
Deprecate bpf_map__is_offload_neutral(). It’s most probably broken
already. PERF_EVENT_ARRAY isn’t the only map that’s not suitable
for hardware offloading. Applications can directly check map type
instead.

  [0] Closes: https://github.com/libbpf/libbpf/issues/306

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220105000601.2090044-1-christylee@fb.com
2022-01-05 16:09:06 -08:00
Naveen N. Rao f28439db47 tracing: Tag trace_percpu_buffer as a percpu pointer
Tag trace_percpu_buffer as a percpu pointer to resolve warnings
reported by sparse:
  /linux/kernel/trace/trace.c:3218:46: warning: incorrect type in initializer (different address spaces)
  /linux/kernel/trace/trace.c:3218:46:    expected void const [noderef] __percpu *__vpp_verify
  /linux/kernel/trace/trace.c:3218:46:    got struct trace_buffer_struct *
  /linux/kernel/trace/trace.c:3234:9: warning: incorrect type in initializer (different address spaces)
  /linux/kernel/trace/trace.c:3234:9:    expected void const [noderef] __percpu *__vpp_verify
  /linux/kernel/trace/trace.c:3234:9:    got int *

Link: https://lkml.kernel.org/r/ebabd3f23101d89cb75671b68b6f819f5edc830b.1640255304.git.naveen.n.rao@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 07d777fe8c ("tracing: Add percpu buffers for trace_printk()")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2022-01-05 18:53:49 -05:00
Naveen N. Rao 823e670f7e tracing: Fix check for trace_percpu_buffer validity in get_trace_buf()
With the new osnoise tracer, we are seeing the below splat:
    Kernel attempted to read user page (c7d880000) - exploit attempt? (uid: 0)
    BUG: Unable to handle kernel data access on read at 0xc7d880000
    Faulting instruction address: 0xc0000000002ffa10
    Oops: Kernel access of bad area, sig: 11 [#1]
    LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
    ...
    NIP [c0000000002ffa10] __trace_array_vprintk.part.0+0x70/0x2f0
    LR [c0000000002ff9fc] __trace_array_vprintk.part.0+0x5c/0x2f0
    Call Trace:
    [c0000008bdd73b80] [c0000000001c49cc] put_prev_task_fair+0x3c/0x60 (unreliable)
    [c0000008bdd73be0] [c000000000301430] trace_array_printk_buf+0x70/0x90
    [c0000008bdd73c00] [c0000000003178b0] trace_sched_switch_callback+0x250/0x290
    [c0000008bdd73c90] [c000000000e70d60] __schedule+0x410/0x710
    [c0000008bdd73d40] [c000000000e710c0] schedule+0x60/0x130
    [c0000008bdd73d70] [c000000000030614] interrupt_exit_user_prepare_main+0x264/0x270
    [c0000008bdd73de0] [c000000000030a70] syscall_exit_prepare+0x150/0x180
    [c0000008bdd73e10] [c00000000000c174] system_call_vectored_common+0xf4/0x278

osnoise tracer on ppc64le is triggering osnoise_taint() for negative
duration in get_int_safe_duration() called from
trace_sched_switch_callback()->thread_exit().

The problem though is that the check for a valid trace_percpu_buffer is
incorrect in get_trace_buf(). The check is being done after calculating
the pointer for the current cpu, rather than on the main percpu pointer.
Fix the check to be against trace_percpu_buffer.

Link: https://lkml.kernel.org/r/a920e4272e0b0635cf20c444707cbce1b2c8973d.1640255304.git.naveen.n.rao@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Fixes: e2ace00117 ("tracing: Choose static tp_printk buffer by explicit nesting count")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2022-01-05 18:51:25 -05:00
Qiang Wang 51a33c60f1 libbpf: Support repeated legacy kprobes on same function
If repeated legacy kprobes on same function in one process,
libbpf will register using the same probe name and got -EBUSY
error. So append index to the probe name format to fix this
problem.

Co-developed-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Qiang Wang <wangqiang.wq.frank@bytedance.com>
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211227130713.66933-2-wangqiang.wq.frank@bytedance.com
2022-01-05 15:38:21 -08:00
Qiang Wang 71cff670ba libbpf: Use probe_name for legacy kprobe
Fix a bug in commit 46ed5fc33d, which wrongly used the
func_name instead of probe_name to register legacy kprobe.

Fixes: 46ed5fc33d ("libbpf: Refactor and simplify legacy kprobe code")
Co-developed-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Qiang Wang <wangqiang.wq.frank@bytedance.com>
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
Reviewed-by: Hengqi Chen <hengqi.chen@gmail.com>
Link: https://lore.kernel.org/bpf/20211227130713.66933-1-wangqiang.wq.frank@bytedance.com
2022-01-05 15:38:21 -08:00
Jiri Olsa 0daf5cb217 ftrace/samples: Add missing prototypes direct functions
There's another compilation fail (first here [1]) reported by kernel
test robot for W=1 clang build:

  >> samples/ftrace/ftrace-direct-multi-modify.c:7:6: warning: no previous
  prototype for function 'my_direct_func1' [-Wmissing-prototypes]
     void my_direct_func1(unsigned long ip)

Direct functions in ftrace direct sample modules need to have prototypes
defined. They are already global in order to be visible for the inline
assembly, so there's no problem.

The kernel test robot reported just error for ftrace-direct-multi-modify,
but I got same errors also for the rest of the modules touched by this patch.

[1] 67d4f6e3bf ftrace/samples: Add missing prototype for my_direct_func

Link: https://lkml.kernel.org/r/20211219135317.212430-1-jolsa@kernel.org

Reported-by: kernel test robot <lkp@intel.com>
Fixes: e1067a07cf ("ftrace/samples: Add module to test multi direct modify interface")
Fixes: ae0cc3b7e7 ("ftrace/samples: Add a sample module that implements modify_ftrace_direct()")
Fixes: 156473a0ff ("ftrace: Add another example of register_ftrace_direct() use case")
Fixes: b06457c83a ("ftrace: Add sample module that uses register_ftrace_direct()")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2022-01-05 18:34:50 -05:00
Christy Lee 7218c28c87 libbpf: Deprecate bpf_perf_event_read_simple() API
With perf_buffer__poll() and perf_buffer__consume() APIs available,
there is no reason to expose bpf_perf_event_read_simple() API to
users. If users need custom perf buffer, they could re-implement
the function.

Mark bpf_perf_event_read_simple() and move the logic to a new
static function so it can still be called by other functions in the
same file.

  [0] Closes: https://github.com/libbpf/libbpf/issues/310

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211229204156.13569-1-christylee@fb.com
2022-01-05 15:27:43 -08:00