fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to avoid cache line ping pongs on SMP. fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short section instead during whole list scan. Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync() doesnt need its own implementation and can use fasync_helper(), to reduce code size and complexity. We can remove __kill_fasync() direct use in net/socket.c, and rename it to kill_fasync_rcu(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
e5700aff14
commit
989a297920
66
fs/fcntl.c
66
fs/fcntl.c
|
@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEFINE_RWLOCK(fasync_lock);
|
static DEFINE_SPINLOCK(fasync_lock);
|
||||||
static struct kmem_cache *fasync_cache __read_mostly;
|
static struct kmem_cache *fasync_cache __read_mostly;
|
||||||
|
|
||||||
|
static void fasync_free_rcu(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
kmem_cache_free(fasync_cache,
|
||||||
|
container_of(head, struct fasync_struct, fa_rcu));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove a fasync entry. If successfully removed, return
|
* Remove a fasync entry. If successfully removed, return
|
||||||
* positive and clear the FASYNC flag. If no entry exists,
|
* positive and clear the FASYNC flag. If no entry exists,
|
||||||
|
@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
|
||||||
* NOTE! It is very important that the FASYNC flag always
|
* NOTE! It is very important that the FASYNC flag always
|
||||||
* match the state "is the filp on a fasync list".
|
* match the state "is the filp on a fasync list".
|
||||||
*
|
*
|
||||||
* We always take the 'filp->f_lock', in since fasync_lock
|
|
||||||
* needs to be irq-safe.
|
|
||||||
*/
|
*/
|
||||||
static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
|
static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
|
||||||
{
|
{
|
||||||
|
@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
spin_lock(&filp->f_lock);
|
spin_lock(&filp->f_lock);
|
||||||
write_lock_irq(&fasync_lock);
|
spin_lock(&fasync_lock);
|
||||||
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
|
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
|
||||||
if (fa->fa_file != filp)
|
if (fa->fa_file != filp)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
spin_lock_irq(&fa->fa_lock);
|
||||||
|
fa->fa_file = NULL;
|
||||||
|
spin_unlock_irq(&fa->fa_lock);
|
||||||
|
|
||||||
*fp = fa->fa_next;
|
*fp = fa->fa_next;
|
||||||
kmem_cache_free(fasync_cache, fa);
|
call_rcu(&fa->fa_rcu, fasync_free_rcu);
|
||||||
filp->f_flags &= ~FASYNC;
|
filp->f_flags &= ~FASYNC;
|
||||||
result = 1;
|
result = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
write_unlock_irq(&fasync_lock);
|
spin_unlock(&fasync_lock);
|
||||||
spin_unlock(&filp->f_lock);
|
spin_unlock(&filp->f_lock);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
spin_lock(&filp->f_lock);
|
spin_lock(&filp->f_lock);
|
||||||
write_lock_irq(&fasync_lock);
|
spin_lock(&fasync_lock);
|
||||||
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
|
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
|
||||||
if (fa->fa_file != filp)
|
if (fa->fa_file != filp)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
spin_lock_irq(&fa->fa_lock);
|
||||||
fa->fa_fd = fd;
|
fa->fa_fd = fd;
|
||||||
|
spin_unlock_irq(&fa->fa_lock);
|
||||||
|
|
||||||
kmem_cache_free(fasync_cache, new);
|
kmem_cache_free(fasync_cache, new);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_init(&new->fa_lock);
|
||||||
new->magic = FASYNC_MAGIC;
|
new->magic = FASYNC_MAGIC;
|
||||||
new->fa_file = filp;
|
new->fa_file = filp;
|
||||||
new->fa_fd = fd;
|
new->fa_fd = fd;
|
||||||
new->fa_next = *fapp;
|
new->fa_next = *fapp;
|
||||||
*fapp = new;
|
rcu_assign_pointer(*fapp, new);
|
||||||
result = 1;
|
result = 1;
|
||||||
filp->f_flags |= FASYNC;
|
filp->f_flags |= FASYNC;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
write_unlock_irq(&fasync_lock);
|
spin_unlock(&fasync_lock);
|
||||||
spin_unlock(&filp->f_lock);
|
spin_unlock(&filp->f_lock);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
|
||||||
|
|
||||||
EXPORT_SYMBOL(fasync_helper);
|
EXPORT_SYMBOL(fasync_helper);
|
||||||
|
|
||||||
void __kill_fasync(struct fasync_struct *fa, int sig, int band)
|
/*
|
||||||
|
* rcu_read_lock() is held
|
||||||
|
*/
|
||||||
|
static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
|
||||||
{
|
{
|
||||||
while (fa) {
|
while (fa) {
|
||||||
struct fown_struct * fown;
|
struct fown_struct *fown;
|
||||||
if (fa->magic != FASYNC_MAGIC) {
|
if (fa->magic != FASYNC_MAGIC) {
|
||||||
printk(KERN_ERR "kill_fasync: bad magic number in "
|
printk(KERN_ERR "kill_fasync: bad magic number in "
|
||||||
"fasync_struct!\n");
|
"fasync_struct!\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fown = &fa->fa_file->f_owner;
|
spin_lock(&fa->fa_lock);
|
||||||
/* Don't send SIGURG to processes which have not set a
|
if (fa->fa_file) {
|
||||||
queued signum: SIGURG has its own default signalling
|
fown = &fa->fa_file->f_owner;
|
||||||
mechanism. */
|
/* Don't send SIGURG to processes which have not set a
|
||||||
if (!(sig == SIGURG && fown->signum == 0))
|
queued signum: SIGURG has its own default signalling
|
||||||
send_sigio(fown, fa->fa_fd, band);
|
mechanism. */
|
||||||
fa = fa->fa_next;
|
if (!(sig == SIGURG && fown->signum == 0))
|
||||||
|
send_sigio(fown, fa->fa_fd, band);
|
||||||
|
}
|
||||||
|
spin_unlock(&fa->fa_lock);
|
||||||
|
fa = rcu_dereference(fa->fa_next);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(__kill_fasync);
|
|
||||||
|
|
||||||
void kill_fasync(struct fasync_struct **fp, int sig, int band)
|
void kill_fasync(struct fasync_struct **fp, int sig, int band)
|
||||||
{
|
{
|
||||||
/* First a quick test without locking: usually
|
/* First a quick test without locking: usually
|
||||||
* the list is empty.
|
* the list is empty.
|
||||||
*/
|
*/
|
||||||
if (*fp) {
|
if (*fp) {
|
||||||
read_lock(&fasync_lock);
|
rcu_read_lock();
|
||||||
/* reread *fp after obtaining the lock */
|
kill_fasync_rcu(rcu_dereference(*fp), sig, band);
|
||||||
__kill_fasync(*fp, sig, band);
|
rcu_read_unlock();
|
||||||
read_unlock(&fasync_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kill_fasync);
|
EXPORT_SYMBOL(kill_fasync);
|
||||||
|
|
|
@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
|
||||||
|
|
||||||
|
|
||||||
struct fasync_struct {
|
struct fasync_struct {
|
||||||
int magic;
|
spinlock_t fa_lock;
|
||||||
int fa_fd;
|
int magic;
|
||||||
struct fasync_struct *fa_next; /* singly linked list */
|
int fa_fd;
|
||||||
struct file *fa_file;
|
struct fasync_struct *fa_next; /* singly linked list */
|
||||||
|
struct file *fa_file;
|
||||||
|
struct rcu_head fa_rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FASYNC_MAGIC 0x4601
|
#define FASYNC_MAGIC 0x4601
|
||||||
|
@ -1292,8 +1294,6 @@ struct fasync_struct {
|
||||||
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
|
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
|
||||||
/* can be called from interrupts */
|
/* can be called from interrupts */
|
||||||
extern void kill_fasync(struct fasync_struct **, int, int);
|
extern void kill_fasync(struct fasync_struct **, int, int);
|
||||||
/* only for net: no internal synchronization */
|
|
||||||
extern void __kill_fasync(struct fasync_struct *, int, int);
|
|
||||||
|
|
||||||
extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
|
extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
|
||||||
extern int f_setown(struct file *filp, unsigned long arg, int force);
|
extern int f_setown(struct file *filp, unsigned long arg, int force);
|
||||||
|
|
73
net/socket.c
73
net/socket.c
|
@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
|
||||||
* 1. fasync_list is modified only under process context socket lock
|
* 1. fasync_list is modified only under process context socket lock
|
||||||
* i.e. under semaphore.
|
* i.e. under semaphore.
|
||||||
* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
|
* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
|
||||||
* or under socket lock.
|
* or under socket lock
|
||||||
* 3. fasync_list can be used from softirq context, so that
|
|
||||||
* modification under socket lock have to be enhanced with
|
|
||||||
* write_lock_bh(&sk->sk_callback_lock).
|
|
||||||
* --ANK (990710)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int sock_fasync(int fd, struct file *filp, int on)
|
static int sock_fasync(int fd, struct file *filp, int on)
|
||||||
{
|
{
|
||||||
struct fasync_struct *fa, *fna = NULL, **prev;
|
struct socket *sock = filp->private_data;
|
||||||
struct socket *sock;
|
struct sock *sk = sock->sk;
|
||||||
struct sock *sk;
|
|
||||||
|
|
||||||
if (on) {
|
if (sk == NULL)
|
||||||
fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
|
|
||||||
if (fna == NULL)
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
sock = filp->private_data;
|
|
||||||
|
|
||||||
sk = sock->sk;
|
|
||||||
if (sk == NULL) {
|
|
||||||
kfree(fna);
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
|
|
||||||
spin_lock(&filp->f_lock);
|
fasync_helper(fd, filp, on, &sock->fasync_list);
|
||||||
if (on)
|
|
||||||
filp->f_flags |= FASYNC;
|
if (!sock->fasync_list)
|
||||||
|
sock_reset_flag(sk, SOCK_FASYNC);
|
||||||
else
|
else
|
||||||
filp->f_flags &= ~FASYNC;
|
|
||||||
spin_unlock(&filp->f_lock);
|
|
||||||
|
|
||||||
prev = &(sock->fasync_list);
|
|
||||||
|
|
||||||
for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
|
|
||||||
if (fa->fa_file == filp)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (on) {
|
|
||||||
if (fa != NULL) {
|
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
|
||||||
fa->fa_fd = fd;
|
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
|
||||||
|
|
||||||
kfree(fna);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
fna->fa_file = filp;
|
|
||||||
fna->fa_fd = fd;
|
|
||||||
fna->magic = FASYNC_MAGIC;
|
|
||||||
fna->fa_next = sock->fasync_list;
|
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
|
||||||
sock->fasync_list = fna;
|
|
||||||
sock_set_flag(sk, SOCK_FASYNC);
|
sock_set_flag(sk, SOCK_FASYNC);
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
|
||||||
} else {
|
|
||||||
if (fa != NULL) {
|
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
|
||||||
*prev = fa->fa_next;
|
|
||||||
if (!sock->fasync_list)
|
|
||||||
sock_reset_flag(sk, SOCK_FASYNC);
|
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
|
||||||
kfree(fa);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
release_sock(sk);
|
||||||
release_sock(sock->sk);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case SOCK_WAKE_IO:
|
case SOCK_WAKE_IO:
|
||||||
call_kill:
|
call_kill:
|
||||||
__kill_fasync(sock->fasync_list, SIGIO, band);
|
kill_fasync(&sock->fasync_list, SIGIO, band);
|
||||||
break;
|
break;
|
||||||
case SOCK_WAKE_URG:
|
case SOCK_WAKE_URG:
|
||||||
__kill_fasync(sock->fasync_list, SIGURG, band);
|
kill_fasync(&sock->fasync_list, SIGURG, band);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue