2013-09-03 02:58:20 +08:00
|
|
|
#include <linux/export.h>
|
|
|
|
#include <linux/lockref.h>
|
|
|
|
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
#ifdef CONFIG_CMPXCHG_LOCKREF
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that the "cmpxchg()" reloads the "old" value for the
|
|
|
|
* failure case.
|
|
|
|
*/
|
|
|
|
#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
|
|
|
|
struct lockref old; \
|
|
|
|
BUILD_BUG_ON(sizeof(old) != 8); \
|
|
|
|
old.lock_count = ACCESS_ONCE(lockref->lock_count); \
|
|
|
|
while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
|
|
|
|
struct lockref new = old, prev = old; \
|
|
|
|
CODE \
|
|
|
|
old.lock_count = cmpxchg(&lockref->lock_count, \
|
|
|
|
old.lock_count, new.lock_count); \
|
|
|
|
if (likely(old.lock_count == prev.lock_count)) { \
|
|
|
|
SUCCESS; \
|
|
|
|
} \
|
2013-09-04 05:49:49 +08:00
|
|
|
cpu_relax(); \
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-09-03 02:58:20 +08:00
|
|
|
/**
|
|
|
|
* lockref_get - Increments reference count unconditionally
|
|
|
|
* @lockcnt: pointer to lockref structure
|
|
|
|
*
|
|
|
|
* This operation is only valid if you already hold a reference
|
|
|
|
* to the object, so you know the count cannot be zero.
|
|
|
|
*/
|
|
|
|
void lockref_get(struct lockref *lockref)
|
|
|
|
{
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
CMPXCHG_LOOP(
|
|
|
|
new.count++;
|
|
|
|
,
|
|
|
|
return;
|
|
|
|
);
|
|
|
|
|
2013-09-03 02:58:20 +08:00
|
|
|
spin_lock(&lockref->lock);
|
|
|
|
lockref->count++;
|
|
|
|
spin_unlock(&lockref->lock);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(lockref_get);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lockref_get_not_zero - Increments count unless the count is 0
|
|
|
|
* @lockcnt: pointer to lockref structure
|
|
|
|
* Return: 1 if count updated successfully or 0 if count was zero
|
|
|
|
*/
|
|
|
|
int lockref_get_not_zero(struct lockref *lockref)
|
|
|
|
{
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
int retval;
|
|
|
|
|
|
|
|
CMPXCHG_LOOP(
|
|
|
|
new.count++;
|
|
|
|
if (!old.count)
|
|
|
|
return 0;
|
|
|
|
,
|
|
|
|
return 1;
|
|
|
|
);
|
2013-09-03 02:58:20 +08:00
|
|
|
|
|
|
|
spin_lock(&lockref->lock);
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
retval = 0;
|
2013-09-03 02:58:20 +08:00
|
|
|
if (lockref->count) {
|
|
|
|
lockref->count++;
|
|
|
|
retval = 1;
|
|
|
|
}
|
|
|
|
spin_unlock(&lockref->lock);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(lockref_get_not_zero);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lockref_get_or_lock - Increments count unless the count is 0
|
|
|
|
* @lockcnt: pointer to lockref structure
|
|
|
|
* Return: 1 if count updated successfully or 0 if count was zero
|
|
|
|
* and we got the lock instead.
|
|
|
|
*/
|
|
|
|
int lockref_get_or_lock(struct lockref *lockref)
|
|
|
|
{
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
CMPXCHG_LOOP(
|
|
|
|
new.count++;
|
|
|
|
if (!old.count)
|
|
|
|
break;
|
|
|
|
,
|
|
|
|
return 1;
|
|
|
|
);
|
|
|
|
|
2013-09-03 02:58:20 +08:00
|
|
|
spin_lock(&lockref->lock);
|
|
|
|
if (!lockref->count)
|
|
|
|
return 0;
|
|
|
|
lockref->count++;
|
|
|
|
spin_unlock(&lockref->lock);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(lockref_get_or_lock);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* lockref_put_or_lock - decrements count unless count <= 1 before decrement
|
|
|
|
* @lockcnt: pointer to lockref structure
|
|
|
|
* Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
|
|
|
|
*/
|
|
|
|
int lockref_put_or_lock(struct lockref *lockref)
|
|
|
|
{
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-03 03:12:15 +08:00
|
|
|
CMPXCHG_LOOP(
|
|
|
|
new.count--;
|
|
|
|
if (old.count <= 1)
|
|
|
|
break;
|
|
|
|
,
|
|
|
|
return 1;
|
|
|
|
);
|
|
|
|
|
2013-09-03 02:58:20 +08:00
|
|
|
spin_lock(&lockref->lock);
|
|
|
|
if (lockref->count <= 1)
|
|
|
|
return 0;
|
|
|
|
lockref->count--;
|
|
|
|
spin_unlock(&lockref->lock);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(lockref_put_or_lock);
|