OpenCloudOS-Kernel/kernel/locking/qspinlock_cna.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _GEN_CNA_LOCK_SLOWPATH
#error "do not include this file"
#endif

#include <linux/topology.h>
#include <linux/sched/clock.h>
#include <linux/sched/rt.h>
#include <linux/moduleparam.h>
#include <linux/random.h>

/*
 * Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock).
 *
 * In CNA, spinning threads are organized in two queues, a primary queue for
 * threads running on the same NUMA node as the current lock holder, and a
 * secondary queue for threads running on other nodes. Schematically, it
 * looks like this:
 *
 *    cna_node
 *   +----------+     +--------+         +--------+
 *   |mcs:next  | --> |mcs:next| --> ... |mcs:next| --> NULL  [Primary queue]
 *   |mcs:locked| -.  +--------+         +--------+
 *   +----------+  |
 *                 `----------------------.
 *                                        v
 *                 +--------+         +--------+
 *                 |mcs:next| --> ... |mcs:next|            [Secondary queue]
 *                 +--------+         +--------+
 *                     ^                    |
 *                     `--------------------'
 *
 * N.B. locked := 1 if secondary queue is absent. Otherwise, it contains the
 * encoded pointer to the tail of the secondary queue, which is organized as a
 * circular list.
 *
 * After acquiring the MCS lock and before acquiring the spinlock, the MCS lock
 * holder checks whether the next waiter in the primary queue (if exists) is
 * running on the same NUMA node. If it is not, that waiter is detached from the
 * main queue and moved into the tail of the secondary queue. This way, we
 * gradually filter the primary queue, leaving only waiters running on the same
 * preferred NUMA node.
 *
 * For more details, see https://arxiv.org/abs/1810.05600.
 *
 * Authors: Alex Kogan <alex.kogan@oracle.com>
 *          Dave Dice <dave.dice@oracle.com>
 */

#define FLUSH_SECONDARY_QUEUE	1

#define CNA_PRIORITY_NODE      0xffff

#define DEFAULT_LLC_ID 0xfffe

struct cna_node {
	struct mcs_spinlock	mcs;
	u16			llc_id;
	u16			real_llc_id;
	u32			encoded_tail;	/* self */
	u64			start_time;
};

bool use_llc_spinlock = false;

static ulong numa_spinlock_threshold_ns = 1000000;   /* 1ms, by default */
module_param(numa_spinlock_threshold_ns, ulong, 0644);

static inline bool intra_node_threshold_reached(struct cna_node *cn)
{
	u64 current_time = local_clock();
	u64 threshold = cn->start_time + numa_spinlock_threshold_ns;

	return current_time > threshold;
}

/*
 * Controls the probability for enabling the ordering of the main queue
 * when the secondary queue is empty. The chosen value reduces the amount
 * of unnecessary shuffling of threads between the two waiting queues
 * when the contention is low, while responding fast enough and enabling
 * the shuffling when the contention is high.
 */
#define SHUFFLE_REDUCTION_PROB_ARG  (7)

/* Per-CPU pseudo-random number seed */
static DEFINE_PER_CPU(u32, seed);

/*
 * Return false with probability 1 / 2^@num_bits.
 * Intuitively, the larger @num_bits the less likely false is to be returned.
 * @num_bits must be a number between 0 and 31.
 */
static bool probably(unsigned int num_bits)
{
	u32 s;

	s = this_cpu_read(seed);
	s = next_pseudo_random32(s);
	this_cpu_write(seed, s);

	return s & ((1 << num_bits) - 1);
}

static void __init cna_init_nodes_per_cpu(unsigned int cpu)
{
	struct mcs_spinlock *base = per_cpu_ptr(&qnodes[0].mcs, cpu);
	int i;

	for (i = 0; i < MAX_NODES; i++) {
		struct cna_node *cn = (struct cna_node *)grab_mcs_node(base, i);

		/*
		*cpu_llc_id is not initialized when when this function is called
		*so just set a fake llc id.
		*/
		cn->real_llc_id = DEFAULT_LLC_ID;
		cn->encoded_tail = encode_tail(cpu, i);
		/*
		 * make sure @encoded_tail is not confused with other valid
		 * values for @locked (0 or 1)
		 */
		WARN_ON(cn->encoded_tail <= 1);
	}
}

/*
* must be called after cpu_llc_id is initialized.
*/
void cna_set_llc_id_per_cpu(unsigned int cpu)
{
	struct mcs_spinlock *base = per_cpu_ptr(&qnodes[0].mcs, cpu);
	int i;

	if (!use_llc_spinlock)
		return;

	for (i = 0; i < MAX_NODES; i++) {
		struct cna_node *cn = (struct cna_node *)grab_mcs_node(base, i);

		cn->real_llc_id = per_cpu(cpu_llc_id, cpu);
	}
}

static int __init cna_init_nodes(void)
{
	unsigned int cpu;

	/*
	 * this will break on 32bit architectures, so we restrict
	 * the use of CNA to 64bit only (see arch/x86/Kconfig)
	 */
	BUILD_BUG_ON(sizeof(struct cna_node) > sizeof(struct qnode));
	/* we store an ecoded tail word in the node's @locked field */
	BUILD_BUG_ON(sizeof(u32) > sizeof(unsigned int));

	for_each_possible_cpu(cpu)
		cna_init_nodes_per_cpu(cpu);

	return 0;
}

static __always_inline void cna_init_node(struct mcs_spinlock *node)
{
	bool priority = !in_task() || irqs_disabled() || rt_task(current);
	struct cna_node *cn = (struct cna_node *)node;

	cn->llc_id = priority ? CNA_PRIORITY_NODE : cn->real_llc_id;
	cn->start_time = 0;
}

/*
 * cna_splice_head -- splice the entire secondary queue onto the head of the
 * primary queue.
 *
 * Returns the new primary head node or NULL on failure.
 */
static struct mcs_spinlock *
cna_splice_head(struct qspinlock *lock, u32 val,
		struct mcs_spinlock *node, struct mcs_spinlock *next)
{
	struct mcs_spinlock *head_2nd, *tail_2nd;
	u32 new;

	tail_2nd = decode_tail(node->locked);
	head_2nd = tail_2nd->next;

	if (next) {
		/*
		 * If the primary queue is not empty, the primary tail doesn't
		 * need to change and we can simply link the secondary tail to
		 * the old primary head.
		 */
		tail_2nd->next = next;
	} else {
		/*
		 * When the primary queue is empty, the secondary tail becomes
		 * the primary tail.
		 */

		/*
		 * Speculatively break the secondary queue's circular link such
		 * that when the secondary tail becomes the primary tail it all
		 * works out.
		 */
		tail_2nd->next = NULL;

		/*
		 * tail_2nd->next = NULL;	old = xchg_tail(lock, tail);
		 *				prev = decode_tail(old);
		 * try_cmpxchg_release(...);	WRITE_ONCE(prev->next, node);
		 *
		 * If the following cmpxchg() succeeds, our stores will not
		 * collide.
		 */
		new = ((struct cna_node *)tail_2nd)->encoded_tail |
			_Q_LOCKED_VAL;
		if (!atomic_try_cmpxchg_release(&lock->val, &val, new)) {
			/* Restore the secondary queue's circular link. */
			tail_2nd->next = head_2nd;
			return NULL;
		}
	}

	/* The primary queue head now is what was the secondary queue head. */
	return head_2nd;
}

static inline bool cna_try_clear_tail(struct qspinlock *lock, u32 val,
				      struct mcs_spinlock *node)
{
	/*
	 * We're here because the primary queue is empty; check the secondary
	 * queue for remote waiters.
	 */
	if (node->locked > 1) {
		struct mcs_spinlock *next;

		/*
		 * When there are waiters on the secondary queue, try to move
		 * them back onto the primary queue and let them rip.
		 */
		next = cna_splice_head(lock, val, node, NULL);
		if (next) {
			arch_mcs_lock_handoff(&next->locked, 1);
			return true;
		}

		return false;
	}

	/* Both queues are empty. Do what MCS does. */
	return __try_clear_tail(lock, val, node);
}

/*
 * cna_splice_next -- splice the next node from the primary queue onto
 * the secondary queue.
 */
static void cna_splice_next(struct mcs_spinlock *node,
			    struct mcs_spinlock *next,
			    struct mcs_spinlock *nnext)
{
	/* remove 'next' from the main queue */
	node->next = nnext;

	/* stick `next` on the secondary queue tail */
	if (node->locked <= 1) { /* if secondary queue is empty */
		struct cna_node *cn = (struct cna_node *)node;

		/* create secondary queue */
		next->next = next;

		cn->start_time = local_clock();
		/* secondary queue is not empty iff start_time != 0 */
		WARN_ON(!cn->start_time);
	} else {
		/* add to the tail of the secondary queue */
		struct mcs_spinlock *tail_2nd = decode_tail(node->locked);
		struct mcs_spinlock *head_2nd = tail_2nd->next;

		tail_2nd->next = next;
		next->next = head_2nd;
	}

	node->locked = ((struct cna_node *)next)->encoded_tail;
}

/*
 * cna_order_queue - check whether the next waiter in the main queue is on
 * the same NUMA node as the lock holder; if not, and it has a waiter behind
 * it in the main queue, move the former onto the secondary queue.
 * Returns 1 if the next waiter runs on the same NUMA node; 0 otherwise.
 */
static int cna_order_queue(struct mcs_spinlock *node)
{
	struct mcs_spinlock *next = READ_ONCE(node->next);
	struct cna_node *cn = (struct cna_node *)node;
	int llc_id, next_llc_id;

	if (!next)
		return 0;

	llc_id = cn->llc_id;
	next_llc_id = ((struct cna_node *)next)->llc_id;

	if (next_llc_id != llc_id && next_llc_id != CNA_PRIORITY_NODE) {
		struct mcs_spinlock *nnext = READ_ONCE(next->next);

		if (nnext)
			cna_splice_next(node, next, nnext);

		return 0;
	}
	return 1;
}

#define LOCK_IS_BUSY(lock) (atomic_read(&(lock)->val) & _Q_LOCKED_PENDING_MASK)

/* Abuse the pv_wait_head_or_lock() hook to get some work done */
static __always_inline u32 cna_wait_head_or_lock(struct qspinlock *lock,
						 struct mcs_spinlock *node)
{
	struct cna_node *cn = (struct cna_node *)node;

	if (node->locked <= 1 && probably(SHUFFLE_REDUCTION_PROB_ARG)) {
		/*
		 * When the secondary queue is empty, skip the calls to
		 * cna_order_queue() below with high probability. This optimization
		 * reduces the overhead of unnecessary shuffling of threads
		 * between waiting queues when the lock is only lightly contended.
		 */
		return 0;
	}

	if (!cn->start_time || !intra_node_threshold_reached(cn)) {
		
		/*
		 * We are at the head of the wait queue, no need to use
		 * the fake NUMA node ID.
		 */
		if (cn->llc_id == CNA_PRIORITY_NODE)
			cn->llc_id = cn->real_llc_id;

		/*
		 * Try and put the time otherwise spent spin waiting on
		 * _Q_LOCKED_PENDING_MASK to use by sorting our lists.
		 */
		while (LOCK_IS_BUSY(lock) && !cna_order_queue(node))
			cpu_relax();
	} else {
		cn->start_time = FLUSH_SECONDARY_QUEUE;
	}

	return 0; /* we lied; we didn't wait, go do so now */
}

static inline void cna_lock_handoff(struct mcs_spinlock *node,
				 struct mcs_spinlock *next)
{
	struct cna_node *cn = (struct cna_node *)node;
	u32 val = 1;


	if (cn->start_time != FLUSH_SECONDARY_QUEUE) {
		if (node->locked > 1) {
			val = node->locked;	/* preseve secondary queue */

			/*
			 * We have a local waiter, either real or fake one;
			 * reload @next in case it was changed by cna_order_queue().
			 */
			next = node->next;

			/*
			 * Pass over NUMA node id of primary queue, to maintain the
			 * preference even if the next waiter is on a different node.
			 */
			((struct cna_node *)next)->llc_id = cn->llc_id;

			((struct cna_node *)next)->start_time = cn->start_time;
		}
	} else {
		/*
		 * We decided to flush the secondary queue;
		 * this can only happen if that queue is not empty.
		 */

		WARN_ON(node->locked <= 1);
		/*
		 * Splice the secondary queue onto the primary queue and pass the lock
		 * to the longest waiting remote waiter.
		 */

		next = cna_splice_head(NULL, 0, node, next);
 	}

	arch_mcs_lock_handoff(&next->locked, val);
}

/*
 * Constant (boot-param configurable) flag selecting the LLC-aware variant
 * of spinlock.  Possible values: -1 (off) / 0 (auto, default) / 1 (on).
 */
static int llc_spinlock_flag = -1;

static int __init llc_spinlock_setup(char *str)
{
	if (!strcmp(str, "auto")) {
		llc_spinlock_flag = 0;
		return 1;
	} else if (!strcmp(str, "on")) {
		llc_spinlock_flag = 1;
		return 1;
	} else if (!strcmp(str, "off")) {
		llc_spinlock_flag = -1;
		return 1;
	}

	return 0;
}
__setup("llc_spinlock=", llc_spinlock_setup);

void __cna_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);

/*
 * Switch to the llc-friendly slow path for spinlocks when we have
 * multiple llc nodes in native environment, unless the user has
 * overridden this default behavior by setting the llc_spinlock flag.
 */
void __init cna_configure_spin_lock_slowpath(void)
{
	if (llc_spinlock_flag < 0)
		return;

	if (llc_spinlock_flag == 0 && 
		    pv_ops.lock.queued_spin_lock_slowpath !=
			native_queued_spin_lock_slowpath)
		return;

	cna_init_nodes();

	use_llc_spinlock = true;

	pv_ops.lock.queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath;

	pr_info("Enabling CNA spinlock\n");
}
tkernel: sync code to the same with tk4 pub/lts/0017-kabi Sync code to the same with tk4 pub/lts/0017-kabi, except deleted rue and wujing. Partners can submit pull requests to this branch, and we can pick the commits to tk4 pub/lts/0017-kabi easly. Signed-off-by: Jianping Liu <frankjpliu@tencent.com> 2024-06-12 13:13:20 +08:00			`/* SPDX-License-Identifier: GPL-2.0 */`
			`#ifndef _GEN_CNA_LOCK_SLOWPATH`
			`#error "do not include this file"`
			`#endif`

			`#include <linux/topology.h>`
			`#include <linux/sched/clock.h>`
			`#include <linux/sched/rt.h>`
			`#include <linux/moduleparam.h>`
			`#include <linux/random.h>`

			`/*`
			`* Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock).`
			`*`
			`* In CNA, spinning threads are organized in two queues, a primary queue for`
			`* threads running on the same NUMA node as the current lock holder, and a`
			`* secondary queue for threads running on other nodes. Schematically, it`
			`* looks like this:`
			`*`
			`* cna_node`
			`* +----------+ +--------+ +--------+`
			`* \|mcs:next \| --> \|mcs:next\| --> ... \|mcs:next\| --> NULL [Primary queue]`
			`* \|mcs:locked\| -. +--------+ +--------+`
			`* +----------+ \|`
			* `----------------------.
			`* v`
			`* +--------+ +--------+`
			`* \|mcs:next\| --> ... \|mcs:next\| [Secondary queue]`
			`* +--------+ +--------+`
			`* ^ \|`
			* `--------------------'
			`*`
			`* N.B. locked := 1 if secondary queue is absent. Otherwise, it contains the`
			`* encoded pointer to the tail of the secondary queue, which is organized as a`
			`* circular list.`
			`*`
			`* After acquiring the MCS lock and before acquiring the spinlock, the MCS lock`
			`* holder checks whether the next waiter in the primary queue (if exists) is`
			`* running on the same NUMA node. If it is not, that waiter is detached from the`
			`* main queue and moved into the tail of the secondary queue. This way, we`
			`* gradually filter the primary queue, leaving only waiters running on the same`
			`* preferred NUMA node.`
			`*`
			`* For more details, see https://arxiv.org/abs/1810.05600.`
			`*`
			`* Authors: Alex Kogan <alex.kogan@oracle.com>`
			`* Dave Dice <dave.dice@oracle.com>`
			`*/`

			`#define FLUSH_SECONDARY_QUEUE 1`

			`#define CNA_PRIORITY_NODE 0xffff`

			`#define DEFAULT_LLC_ID 0xfffe`

			`struct cna_node {`
			`struct mcs_spinlock mcs;`
			`u16 llc_id;`
			`u16 real_llc_id;`
			`u32 encoded_tail; /* self */`
			`u64 start_time;`
			`};`

			`bool use_llc_spinlock = false;`

			`static ulong numa_spinlock_threshold_ns = 1000000; /* 1ms, by default */`
			`module_param(numa_spinlock_threshold_ns, ulong, 0644);`

			`static inline bool intra_node_threshold_reached(struct cna_node *cn)`
			`{`
			`u64 current_time = local_clock();`
			`u64 threshold = cn->start_time + numa_spinlock_threshold_ns;`

			`return current_time > threshold;`
			`}`

			`/*`
			`* Controls the probability for enabling the ordering of the main queue`
			`* when the secondary queue is empty. The chosen value reduces the amount`
			`* of unnecessary shuffling of threads between the two waiting queues`
			`* when the contention is low, while responding fast enough and enabling`
			`* the shuffling when the contention is high.`
			`*/`
			`#define SHUFFLE_REDUCTION_PROB_ARG (7)`

			`/* Per-CPU pseudo-random number seed */`
			`static DEFINE_PER_CPU(u32, seed);`

			`/*`
			`* Return false with probability 1 / 2^@num_bits.`
			`* Intuitively, the larger @num_bits the less likely false is to be returned.`
			`* @num_bits must be a number between 0 and 31.`
			`*/`
			`static bool probably(unsigned int num_bits)`
			`{`
			`u32 s;`

			`s = this_cpu_read(seed);`
			`s = next_pseudo_random32(s);`
			`this_cpu_write(seed, s);`

			`return s & ((1 << num_bits) - 1);`
			`}`

			`static void __init cna_init_nodes_per_cpu(unsigned int cpu)`
			`{`
			`struct mcs_spinlock *base = per_cpu_ptr(&qnodes[0].mcs, cpu);`
			`int i;`

			`for (i = 0; i < MAX_NODES; i++) {`
			`struct cna_node cn = (struct cna_node )grab_mcs_node(base, i);`

			`/*`
			`*cpu_llc_id is not initialized when when this function is called`
			`*so just set a fake llc id.`
			`*/`
			`cn->real_llc_id = DEFAULT_LLC_ID;`
			`cn->encoded_tail = encode_tail(cpu, i);`
			`/*`
			`* make sure @encoded_tail is not confused with other valid`
			`* values for @locked (0 or 1)`
			`*/`
			`WARN_ON(cn->encoded_tail <= 1);`
			`}`
			`}`

			`/*`
			`* must be called after cpu_llc_id is initialized.`
			`*/`
			`void cna_set_llc_id_per_cpu(unsigned int cpu)`
			`{`
			`struct mcs_spinlock *base = per_cpu_ptr(&qnodes[0].mcs, cpu);`
			`int i;`

			`if (!use_llc_spinlock)`
			`return;`

			`for (i = 0; i < MAX_NODES; i++) {`
			`struct cna_node cn = (struct cna_node )grab_mcs_node(base, i);`

			`cn->real_llc_id = per_cpu(cpu_llc_id, cpu);`
			`}`
			`}`

			`static int __init cna_init_nodes(void)`
			`{`
			`unsigned int cpu;`

			`/*`
			`* this will break on 32bit architectures, so we restrict`
			`* the use of CNA to 64bit only (see arch/x86/Kconfig)`
			`*/`
			`BUILD_BUG_ON(sizeof(struct cna_node) > sizeof(struct qnode));`
			`/* we store an ecoded tail word in the node's @locked field */`
			`BUILD_BUG_ON(sizeof(u32) > sizeof(unsigned int));`

			`for_each_possible_cpu(cpu)`
			`cna_init_nodes_per_cpu(cpu);`

			`return 0;`
			`}`

			`static __always_inline void cna_init_node(struct mcs_spinlock *node)`
			`{`
			`bool priority = !in_task() \|\| irqs_disabled() \|\| rt_task(current);`
			`struct cna_node cn = (struct cna_node )node;`

			`cn->llc_id = priority ? CNA_PRIORITY_NODE : cn->real_llc_id;`
			`cn->start_time = 0;`
			`}`

			`/*`
			`* cna_splice_head -- splice the entire secondary queue onto the head of the`
			`* primary queue.`
			`*`
			`* Returns the new primary head node or NULL on failure.`
			`*/`
			`static struct mcs_spinlock *`
			`cna_splice_head(struct qspinlock *lock, u32 val,`
			`struct mcs_spinlock node, struct mcs_spinlock next)`
			`{`
			`struct mcs_spinlock head_2nd, tail_2nd;`
			`u32 new;`

			`tail_2nd = decode_tail(node->locked);`
			`head_2nd = tail_2nd->next;`

			`if (next) {`
			`/*`
			`* If the primary queue is not empty, the primary tail doesn't`
			`* need to change and we can simply link the secondary tail to`
			`* the old primary head.`
			`*/`
			`tail_2nd->next = next;`
			`} else {`
			`/*`
			`* When the primary queue is empty, the secondary tail becomes`
			`* the primary tail.`
			`*/`

			`/*`
			`* Speculatively break the secondary queue's circular link such`
			`* that when the secondary tail becomes the primary tail it all`
			`* works out.`
			`*/`
			`tail_2nd->next = NULL;`

			`/*`
			`* tail_2nd->next = NULL; old = xchg_tail(lock, tail);`
			`* prev = decode_tail(old);`
			`* try_cmpxchg_release(...); WRITE_ONCE(prev->next, node);`
			`*`
			`* If the following cmpxchg() succeeds, our stores will not`
			`* collide.`
			`*/`
			`new = ((struct cna_node *)tail_2nd)->encoded_tail \|`
			`_Q_LOCKED_VAL;`
			`if (!atomic_try_cmpxchg_release(&lock->val, &val, new)) {`
			`/* Restore the secondary queue's circular link. */`
			`tail_2nd->next = head_2nd;`
			`return NULL;`
			`}`
			`}`

			`/* The primary queue head now is what was the secondary queue head. */`
			`return head_2nd;`
			`}`

			`static inline bool cna_try_clear_tail(struct qspinlock *lock, u32 val,`
			`struct mcs_spinlock *node)`
			`{`
			`/*`
			`* We're here because the primary queue is empty; check the secondary`
			`* queue for remote waiters.`
			`*/`
			`if (node->locked > 1) {`
			`struct mcs_spinlock *next;`

			`/*`
			`* When there are waiters on the secondary queue, try to move`
			`* them back onto the primary queue and let them rip.`
			`*/`
			`next = cna_splice_head(lock, val, node, NULL);`
			`if (next) {`
			`arch_mcs_lock_handoff(&next->locked, 1);`
			`return true;`
			`}`

			`return false;`
			`}`

			`/* Both queues are empty. Do what MCS does. */`
			`return __try_clear_tail(lock, val, node);`
			`}`

			`/*`
			`* cna_splice_next -- splice the next node from the primary queue onto`
			`* the secondary queue.`
			`*/`
			`static void cna_splice_next(struct mcs_spinlock *node,`
			`struct mcs_spinlock *next,`
			`struct mcs_spinlock *nnext)`
			`{`
			`/* remove 'next' from the main queue */`
			`node->next = nnext;`

			/* stick `next` on the secondary queue tail */
			`if (node->locked <= 1) { /* if secondary queue is empty */`
			`struct cna_node cn = (struct cna_node )node;`

			`/* create secondary queue */`
			`next->next = next;`

			`cn->start_time = local_clock();`
			`/* secondary queue is not empty iff start_time != 0 */`
			`WARN_ON(!cn->start_time);`
			`} else {`
			`/* add to the tail of the secondary queue */`
			`struct mcs_spinlock *tail_2nd = decode_tail(node->locked);`
			`struct mcs_spinlock *head_2nd = tail_2nd->next;`

			`tail_2nd->next = next;`
			`next->next = head_2nd;`
			`}`

			`node->locked = ((struct cna_node *)next)->encoded_tail;`
			`}`

			`/*`
			`* cna_order_queue - check whether the next waiter in the main queue is on`
			`* the same NUMA node as the lock holder; if not, and it has a waiter behind`
			`* it in the main queue, move the former onto the secondary queue.`
			`* Returns 1 if the next waiter runs on the same NUMA node; 0 otherwise.`
			`*/`
			`static int cna_order_queue(struct mcs_spinlock *node)`
			`{`
			`struct mcs_spinlock *next = READ_ONCE(node->next);`
			`struct cna_node cn = (struct cna_node )node;`
			`int llc_id, next_llc_id;`

			`if (!next)`
			`return 0;`

			`llc_id = cn->llc_id;`
			`next_llc_id = ((struct cna_node *)next)->llc_id;`

			`if (next_llc_id != llc_id && next_llc_id != CNA_PRIORITY_NODE) {`
			`struct mcs_spinlock *nnext = READ_ONCE(next->next);`

			`if (nnext)`
			`cna_splice_next(node, next, nnext);`

			`return 0;`
			`}`
			`return 1;`
			`}`

			`#define LOCK_IS_BUSY(lock) (atomic_read(&(lock)->val) & _Q_LOCKED_PENDING_MASK)`

			`/* Abuse the pv_wait_head_or_lock() hook to get some work done */`
			`static __always_inline u32 cna_wait_head_or_lock(struct qspinlock *lock,`
			`struct mcs_spinlock *node)`
			`{`
			`struct cna_node cn = (struct cna_node )node;`

			`if (node->locked <= 1 && probably(SHUFFLE_REDUCTION_PROB_ARG)) {`
			`/*`
			`* When the secondary queue is empty, skip the calls to`
			`* cna_order_queue() below with high probability. This optimization`
			`* reduces the overhead of unnecessary shuffling of threads`
			`* between waiting queues when the lock is only lightly contended.`
			`*/`
			`return 0;`
			`}`

			`if (!cn->start_time \|\| !intra_node_threshold_reached(cn)) {`

			`/*`
			`* We are at the head of the wait queue, no need to use`
			`* the fake NUMA node ID.`
			`*/`
			`if (cn->llc_id == CNA_PRIORITY_NODE)`
			`cn->llc_id = cn->real_llc_id;`

			`/*`
			`* Try and put the time otherwise spent spin waiting on`
			`* _Q_LOCKED_PENDING_MASK to use by sorting our lists.`
			`*/`
			`while (LOCK_IS_BUSY(lock) && !cna_order_queue(node))`
			`cpu_relax();`
			`} else {`
			`cn->start_time = FLUSH_SECONDARY_QUEUE;`
			`}`

			`return 0; /* we lied; we didn't wait, go do so now */`
			`}`

			`static inline void cna_lock_handoff(struct mcs_spinlock *node,`
			`struct mcs_spinlock *next)`
			`{`
			`struct cna_node cn = (struct cna_node )node;`
			`u32 val = 1;`


			`if (cn->start_time != FLUSH_SECONDARY_QUEUE) {`
			`if (node->locked > 1) {`
			`val = node->locked; /* preseve secondary queue */`

			`/*`
			`* We have a local waiter, either real or fake one;`
			`* reload @next in case it was changed by cna_order_queue().`
			`*/`
			`next = node->next;`

			`/*`
			`* Pass over NUMA node id of primary queue, to maintain the`
			`* preference even if the next waiter is on a different node.`
			`*/`
			`((struct cna_node *)next)->llc_id = cn->llc_id;`

			`((struct cna_node *)next)->start_time = cn->start_time;`
			`}`
			`} else {`
			`/*`
			`* We decided to flush the secondary queue;`
			`* this can only happen if that queue is not empty.`
			`*/`

			`WARN_ON(node->locked <= 1);`
			`/*`
			`* Splice the secondary queue onto the primary queue and pass the lock`
			`* to the longest waiting remote waiter.`
			`*/`

			`next = cna_splice_head(NULL, 0, node, next);`
			`}`

			`arch_mcs_lock_handoff(&next->locked, val);`
			`}`

			`/*`
			`* Constant (boot-param configurable) flag selecting the LLC-aware variant`
			`* of spinlock. Possible values: -1 (off) / 0 (auto, default) / 1 (on).`
			`*/`
			`static int llc_spinlock_flag = -1;`

			`static int __init llc_spinlock_setup(char *str)`
			`{`
			`if (!strcmp(str, "auto")) {`
			`llc_spinlock_flag = 0;`
			`return 1;`
			`} else if (!strcmp(str, "on")) {`
			`llc_spinlock_flag = 1;`
			`return 1;`
			`} else if (!strcmp(str, "off")) {`
			`llc_spinlock_flag = -1;`
			`return 1;`
			`}`

			`return 0;`
			`}`
			`__setup("llc_spinlock=", llc_spinlock_setup);`

			`void __cna_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);`

			`/*`
			`* Switch to the llc-friendly slow path for spinlocks when we have`
			`* multiple llc nodes in native environment, unless the user has`
			`* overridden this default behavior by setting the llc_spinlock flag.`
			`*/`
			`void __init cna_configure_spin_lock_slowpath(void)`
			`{`
			`if (llc_spinlock_flag < 0)`
			`return;`

			`if (llc_spinlock_flag == 0 &&`
			`pv_ops.lock.queued_spin_lock_slowpath !=`
			`native_queued_spin_lock_slowpath)`
			`return;`

			`cna_init_nodes();`

			`use_llc_spinlock = true;`

			`pv_ops.lock.queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath;`

			`pr_info("Enabling CNA spinlock\n");`
			`}`