From 26a4a06e7ff2874154eb3f4b4ba0514dc563b100 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 9 Sep 2007 08:39:27 +0200 Subject: [PATCH 01/14] [Bluetooth] Add missing stat.byte_rx counter modification With the support for hci_recv_fragment() the call to increase the stat.byte_rx counter got accidentally removed. This patch fixes it. Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_usb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 59b054810ed0..98a9cdeaffb6 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c @@ -691,15 +691,18 @@ static void hci_usb_rx_complete(struct urb *urb) urb->iso_frame_desc[i].offset, urb->iso_frame_desc[i].actual_length); - if (!urb->iso_frame_desc[i].status) + if (!urb->iso_frame_desc[i].status) { + husb->hdev->stat.byte_rx += urb->iso_frame_desc[i].actual_length; hci_recv_fragment(husb->hdev, _urb->type, urb->transfer_buffer + urb->iso_frame_desc[i].offset, urb->iso_frame_desc[i].actual_length); + } } #else ; #endif } else { + husb->hdev->stat.byte_rx += count; err = hci_recv_fragment(husb->hdev, _urb->type, urb->transfer_buffer, count); if (err < 0) { BT_ERR("%s corrupted packet: type %d count %d", From 767c5eb5d35aeb85987143f0a730bc21d3ecfb3d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 9 Sep 2007 08:39:34 +0200 Subject: [PATCH 02/14] [Bluetooth] Add compat handling for timestamp structure The timestamp structure needs special handling in case of compat programs. Use the same wrapping method the network core uses. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1dae3dfc66a9..68198a788969 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -342,9 +343,23 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ if (mask & HCI_CMSG_TSTAMP) { struct timeval tv; + void *data; + int len; skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + + if (msg->msg_flags & MSG_CMSG_COMPAT) { + struct compat_timeval ctv; + ctv.tv_sec = tv.tv_sec; + ctv.tv_usec = tv.tv_usec; + data = &ctv; + len = sizeof(ctv); + } else { + data = &tv; + len = sizeof(tv); + } + + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data); } } From 7c631a67601f116d303cfb98a3d964a150090e38 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 9 Sep 2007 08:39:43 +0200 Subject: [PATCH 03/14] [Bluetooth] Update security filter for Bluetooth 2.1 This patch updates the HCI security filter with support for the Bluetooth 2.1 commands and events. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 68198a788969..d16ca8e53700 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -71,15 +71,15 @@ static struct hci_sec_filter hci_sec_filter = { { { 0x0 }, /* OGF_LINK_CTL */ - { 0xbe000006, 0x00000001, 0x000000, 0x00 }, + { 0xbe000006, 0x00000001, 0x00000000, 0x00 }, /* OGF_LINK_POLICY */ - { 0x00005200, 0x00000000, 0x000000, 0x00 }, + { 0x00005200, 0x00000000, 0x00000000, 0x00 }, /* OGF_HOST_CTL */ - { 0xaab00200, 0x2b402aaa, 0x020154, 0x00 }, + { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 }, /* OGF_INFO_PARAM */ - { 0x000002be, 0x00000000, 0x000000, 0x00 }, + { 0x000002be, 0x00000000, 0x00000000, 0x00 }, /* OGF_STATUS_PARAM */ - { 0x000000ea, 0x00000000, 0x000000, 0x00 } + { 0x000000ea, 0x00000000, 0x00000000, 0x00 } } }; From 89f2783ded0a4fc98852cb9552bb27a80cd6a41a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 9 Sep 2007 08:39:49 +0200 Subject: [PATCH 04/14] [Bluetooth] Fix parameter list for event filter command On device initialization the event filters are cleared. In case of clearing the filters the extra condition type shall be omitted. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 63caa414945d..18e3afc964df 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -183,6 +183,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) { struct sk_buff *skb; __le16 param; + __u8 flt_type; BT_DBG("%s %ld", hdev->name, opt); @@ -233,11 +234,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Optional initialization */ /* Clear Event Filters */ - { - struct hci_cp_set_event_flt cp; - cp.flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, sizeof(cp), &cp); - } + flt_type = HCI_FLT_CLEAR_ALL; + hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, 1, &flt_type); /* Page timeout ~20 secs */ param = cpu_to_le16(0x8000); From 2fbe43f6f631dd7ce19fb1499d6164a5bdb34568 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 6 Sep 2007 12:04:29 +0100 Subject: [PATCH 05/14] [TG3]: Workaround MSI bug on 5714/5780. A hardware bug was revealed after a recent PCI MSI patch was made to always disable legacy INTX when enabling MSI. The 5714/5780 chips will not generate MSI when INTX is disabled, causing MSI failure messages to be reported, and another patch was made to workaround the problem by disabling MSI on ServerWorks HT1000 bridge chips commonly found with the 5714. We workaround this chip bug by enabling INTX after we enable MSI and after we resume from suspend. Update version to 3.81. This problem was discovered by David Miller. Signed-off-by: Michael Chan Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/tg3.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 58740428dd07..9034a05734ef 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -64,8 +64,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.80" -#define DRV_MODULE_RELDATE "August 2, 2007" +#define DRV_MODULE_VERSION "3.81" +#define DRV_MODULE_RELDATE "September 5, 2007" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -7127,6 +7127,10 @@ static int tg3_open(struct net_device *dev) } else if (pci_enable_msi(tp->pdev) == 0) { u32 msi_mode; + /* Hardware bug - MSI won't work if INTX disabled. */ + if (tp->tg3_flags2 & TG3_FLG2_5780_CLASS) + pci_intx(tp->pdev, 1); + msi_mode = tr32(MSGINT_MODE); tw32(MSGINT_MODE, msi_mode | MSGINT_MODE_ENABLE); tp->tg3_flags2 |= TG3_FLG2_USING_MSI; @@ -12172,6 +12176,11 @@ static int tg3_resume(struct pci_dev *pdev) if (err) return err; + /* Hardware bug - MSI won't work if INTX disabled. */ + if ((tp->tg3_flags2 & TG3_FLG2_5780_CLASS) && + (tp->tg3_flags2 & TG3_FLG2_USING_MSI)) + pci_intx(tp->pdev, 1); + netif_device_attach(dev); tg3_full_lock(tp, 0); From ef8aef55ce61fd0e2af798695f7386ac756ae1e7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 6 Sep 2007 14:06:35 +0100 Subject: [PATCH 06/14] [NET]: Do not dereference iov if length is zero When msg_iovlen is zero we shouldn't try to dereference msg_iov. Right now the only thing that tries to do so is skb_copy_and_csum_datagram_iovec. Since the total length should also be zero if msg_iovlen is zero, it's sufficient to check the total length there and simply return if it's zero. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/datagram.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/datagram.c b/net/core/datagram.c index cb056f476126..029b93e246b4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ From 596e41509550447b030f7b16adaeb0138ab585a8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 11 Sep 2007 10:41:04 +0200 Subject: [PATCH 07/14] [IPV4] devinet: show all addresses assigned to interface Bug: http://bugzilla.kernel.org/show_bug.cgi?id=8876 Not all ips are shown by "ip addr show" command when IPs number assigned to an interface is more than 60-80 (in fact it depends on broadcast/label etc presence on each address). Steps to reproduce: It's terribly simple to reproduce: # for i in $(seq 1 100); do ip ad add 10.0.$i.1/24 dev eth10 ; done # ip addr show this will _not_ show all IPs. Looks like the problem is in netlink/ipv4 message processing. This is fix from bug submitter, it looks correct. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5b77bdaa57dd..5dbe5803b7d5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1193,7 +1193,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { if (ip_idx < s_ip_idx) - goto cont; + continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) From a2221f308dabb95abb914ad858d36c2462705558 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 11 Sep 2007 10:45:15 +0200 Subject: [PATCH 08/14] [DECNET]: Fix interface address listing regression. Not all are listed, same as the IPV4 devinet bug. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index fa6604fcf0e7..8def68209edd 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -814,7 +814,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) { if (dn_idx < skip_naddr) - goto cont; + continue; if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, From 9e3be4b34364a670bd6e57d2e8c3caabdd8d89f8 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 11 Sep 2007 11:04:49 +0200 Subject: [PATCH 09/14] [IPV6]: Freeing alive inet6 address From: Denis V. Lunev addrconf_dad_failure calls addrconf_dad_stop which takes referenced address and drops the count. So, in6_ifa_put perrformed at out: is extra. This results in message: "Freeing alive inet6 address" and not released dst entries. Signed-off-by: Denis V. Lunev Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0358e6066a4e..73a894a2152c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -736,7 +736,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * so fail our DAD process */ addrconf_dad_failure(ifp); - goto out; + return; } else { /* * This is not a dad solicitation. From fdd8a532a6764393305ae7063a8994d71404c482 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Tue, 11 Sep 2007 11:12:06 +0200 Subject: [PATCH 10/14] [NET] DOC: Update networking/multiqueue.txt with correct information. Updated the multiqueue.txt document to call out the correct kernel options to select to enable multiqueue. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- Documentation/networking/multiqueue.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt index 00b60cce2224..ea5a42e8f79f 100644 --- a/Documentation/networking/multiqueue.txt +++ b/Documentation/networking/multiqueue.txt @@ -58,9 +58,13 @@ software, so it's a straight round-robin qdisc. It uses the same syntax and classification priomap that sch_prio uses, so it should be intuitive to configure for people who've used sch_prio. -The PRIO qdisc naturally plugs into a multiqueue device. If PRIO has been -built with NET_SCH_PRIO_MQ, then upon load, it will make sure the number of -bands requested is equal to the number of queues on the hardware. If they +In order to utilitize the multiqueue features of the qdiscs, the network +device layer needs to enable multiple queue support. This can be done by +selecting NETDEVICES_MULTIQUEUE under Drivers. + +The PRIO qdisc naturally plugs into a multiqueue device. If +NETDEVICES_MULTIQUEUE is selected, then on qdisc load, the number of +bands requested is compared to the number of queues on the hardware. If they are equal, it sets a one-to-one mapping up between the queues and bands. If they're not equal, it will not load the qdisc. This is the same behavior for RR. Once the association is made, any skb that is classified will have From 0fb96701376874c9f1f80322f89a5bf4457c709f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 11 Sep 2007 11:27:01 +0200 Subject: [PATCH 11/14] [NETFILTER]: nf_conntrack_ipv4: fix "Frag of proto ..." messages Since we're now using a generic tuple decoding function in ICMP connection tracking, ipv4_get_l4proto() might get called with a fragmented packet from within an ICMP error. Remove the error message we used to print when this happens. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d9b5177989c6..53cb1772f38f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -87,14 +87,10 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, if (iph == NULL) return -NF_DROP; - /* Never happen */ - if (iph->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) { - printk(KERN_ERR "ipv4_get_l4proto: Frag of proto %u\n", - iph->protocol); - } + /* Conntrack defragments packets, we might still see fragments + * inside ICMP packets though. */ + if (iph->frag_off & htons(IP_OFFSET)) return -NF_DROP; - } *dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; From 16fcec35e7d7c4faaa4709f6434a4a25b06d25e3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 11 Sep 2007 11:28:26 +0200 Subject: [PATCH 12/14] [NETFILTER]: Fix/improve deadlock condition on module removal netfilter So I've had a deadlock reported to me. I've found that the sequence of events goes like this: 1) process A (modprobe) runs to remove ip_tables.ko 2) process B (iptables-restore) runs and calls setsockopt on a netfilter socket, increasing the ip_tables socket_ops use count 3) process A acquires a file lock on the file ip_tables.ko, calls remove_module in the kernel, which in turn executes the ip_tables module cleanup routine, which calls nf_unregister_sockopt 4) nf_unregister_sockopt, seeing that the use count is non-zero, puts the calling process into uninterruptible sleep, expecting the process using the socket option code to wake it up when it exits the kernel 4) the user of the socket option code (process B) in do_ipt_get_ctl, calls ipt_find_table_lock, which in this case calls request_module to load ip_tables_nat.ko 5) request_module forks a copy of modprobe (process C) to load the module and blocks until modprobe exits. 6) Process C. forked by request_module process the dependencies of ip_tables_nat.ko, of which ip_tables.ko is one. 7) Process C attempts to lock the request module and all its dependencies, it blocks when it attempts to lock ip_tables.ko (which was previously locked in step 3) Theres not really any great permanent solution to this that I can see, but I've developed a two part solution that corrects the problem Part 1) Modifies the nf_sockopt registration code so that, instead of using a use counter internal to the nf_sockopt_ops structure, we instead use a pointer to the registering modules owner to do module reference counting when nf_sockopt calls a modules set/get routine. This prevents the deadlock by preventing set 4 from happening. Part 2) Enhances the modprobe utilty so that by default it preforms non-blocking remove operations (the same way rmmod does), and add an option to explicity request blocking operation. So if you select blocking operation in modprobe you can still cause the above deadlock, but only if you explicity try (and since root can do any old stupid thing it would like.... :) ). Signed-off-by: Neil Horman Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 ++- net/bridge/netfilter/ebtables.c | 1 + net/ipv4/ipvs/ip_vs_ctl.c | 1 + net/ipv4/netfilter/arp_tables.c | 1 + net/ipv4/netfilter/ip_tables.c | 1 + .../netfilter/nf_conntrack_l3proto_ipv4.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + net/netfilter/nf_sockopt.c | 36 ++++++------------- 8 files changed, 19 insertions(+), 28 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0eed0b7ab2df..1dd075eda595 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -88,9 +88,8 @@ struct nf_sockopt_ops int (*compat_get)(struct sock *sk, int optval, void __user *user, int *len); - /* Number of users inside set() or get(). */ - unsigned int use; - struct task_struct *cleanup_task; + /* Use the module struct to lock set/get code in place */ + struct module *owner; }; /* Each queued (to userspace) skbuff has one of these. */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 4169a2a89a39..6018d0e51938 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1513,6 +1513,7 @@ static struct nf_sockopt_ops ebt_sockopts = .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, + .owner = THIS_MODULE, }; static int __init ebtables_init(void) diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 902fd578aa3c..f656d41d8d41 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -2339,6 +2339,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .get_optmin = IP_VS_BASE_CTL, .get_optmax = IP_VS_SO_GET_MAX+1, .get = do_ip_vs_get_ctl, + .owner = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1149aba9351..29114a9ccd1d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1161,6 +1161,7 @@ static struct nf_sockopt_ops arpt_sockopts = { .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, + .owner = THIS_MODULE, }; static int __init arp_tables_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e1b402c6b855..6486894f450c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2296,6 +2296,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #ifdef CONFIG_COMPAT .compat_get = compat_do_ipt_get_ctl, #endif + .owner = THIS_MODULE, }; static struct xt_match icmp_matchstruct __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 53cb1772f38f..f813e02aab30 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -399,6 +399,7 @@ static struct nf_sockopt_ops so_getorigdst = { .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst, + .owner = THIS_MODULE, }; struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index aeda617246b7..cd9df02bb85c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1462,6 +1462,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, + .owner = THIS_MODULE, }; static struct xt_match icmp6_matchstruct __read_mostly = { diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 8b8ece750313..e32761ce260c 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -55,18 +55,7 @@ EXPORT_SYMBOL(nf_register_sockopt); void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { - /* No point being interruptible: we're probably in cleanup_module() */ - restart: mutex_lock(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - mutex_unlock(&nf_sockopt_mutex); - schedule(); - goto restart; - } list_del(®->list); mutex_unlock(&nf_sockopt_mutex); } @@ -86,10 +75,11 @@ static int nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; @@ -97,23 +87,20 @@ static int nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } @@ -144,10 +131,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; + if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_get) ret = ops->compat_get(sk, @@ -160,7 +149,6 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_set) ret = ops->compat_set(sk, @@ -171,17 +159,15 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } From e1f52208bb968291f7d9142eff60b62984b4a511 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 11 Sep 2007 11:31:43 +0200 Subject: [PATCH 13/14] [IPv6]: Fix NULL pointer dereference in ip6_flush_pending_frames Some of skbs in sk->write_queue do not have skb->dst because we do not fill skb->dst when we allocate new skb in append_data(). BTW, I think we may not need to (or we should not) increment some stats when using corking; if 100 sendmsg() (with MSG_MORE) result in 2 packets, how many should we increment? If 100, we should set skb->dst for every queued skbs. If 1 (or 2 (*)), we increment the stats for the first queued skb and we should just skip incrementing OutDiscards for the rest of queued skbs, adn we should also impelement this semantics in other places; e.g., we should increment other stats just once, not 100 times. *: depends on the place we are discarding the datagram. I guess should just increment by 1 (or 2). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5dead399fe64..26de3c0ea31e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1427,8 +1427,9 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), - IPSTATS_MIB_OUTDISCARDS); + if (skb->dst) + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } From 0a9c73014415d2a84dac346c1e12169142a6ad37 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 11 Sep 2007 11:33:28 +0200 Subject: [PATCH 14/14] [INET_DIAG]: Fix oops in netlink_rcv_skb netlink_run_queue() doesn't handle multiple processes processing the queue concurrently. Serialize queue processing in inet_diag to fix a oops in netlink_rcv_skb caused by netlink_run_queue passing a NULL for the skb. BUG: unable to handle kernel NULL pointer dereference at virtual address 00000054 [349587.500454] printing eip: [349587.500457] c03318ae [349587.500459] *pde = 00000000 [349587.500464] Oops: 0000 [#1] [349587.500466] PREEMPT SMP [349587.500474] Modules linked in: w83627hf hwmon_vid i2c_isa [349587.500483] CPU: 0 [349587.500485] EIP: 0060:[] Not tainted VLI [349587.500487] EFLAGS: 00010246 (2.6.22.3 #1) [349587.500499] EIP is at netlink_rcv_skb+0xa/0x7e [349587.500506] eax: 00000000 ebx: 00000000 ecx: c148d2a0 edx: c0398819 [349587.500510] esi: 00000000 edi: c0398819 ebp: c7a21c8c esp: c7a21c80 [349587.500517] ds: 007b es: 007b fs: 00d8 gs: 0033 ss: 0068 [349587.500521] Process oidentd (pid: 17943, ti=c7a20000 task=cee231c0 task.ti=c7a20000) [349587.500527] Stack: 00000000 c7a21cac f7c8ba78 c7a21ca4 c0331962 c0398819 f7c8ba00 0000004c [349587.500542] f736f000 c7a21cb4 c03988e3 00000001 f7c8ba00 c7a21cc4 c03312a5 0000004c [349587.500558] f7c8ba00 c7a21cd4 c0330681 f7c8ba00 e4695280 c7a21d00 c03307c6 7fffffff [349587.500578] Call Trace: [349587.500581] [] show_trace_log_lvl+0x1c/0x33 [349587.500591] [] show_stack_log_lvl+0x8d/0xaa [349587.500595] [] show_registers+0x1cb/0x321 [349587.500604] [] die+0x112/0x1e1 [349587.500607] [] do_page_fault+0x229/0x565 [349587.500618] [] error_code+0x72/0x78 [349587.500625] [] netlink_run_queue+0x40/0x76 [349587.500632] [] inet_diag_rcv+0x1f/0x2c [349587.500639] [] netlink_data_ready+0x57/0x59 [349587.500643] [] netlink_sendskb+0x24/0x45 [349587.500651] [] netlink_unicast+0x100/0x116 [349587.500656] [] netlink_sendmsg+0x1c2/0x280 [349587.500664] [] sock_sendmsg+0xba/0xd5 [349587.500671] [] sys_sendmsg+0x17b/0x1e8 [349587.500676] [] sys_socketcall+0x230/0x24d [349587.500684] [] syscall_call+0x7/0xb [349587.500691] ======================= [349587.500693] Code: f0 ff 4e 18 0f 94 c0 84 c0 0f 84 66 ff ff ff 89 f0 e8 86 e2 fc ff e9 5a ff ff ff f0 ff 40 10 eb be 55 89 e5 57 89 d7 56 89 c6 53 <8b> 50 54 83 fa 10 72 55 8b 9e 9c 00 00 00 31 c9 8b 03 83 f8 0f Reported by Athanasius Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dbeacd8b0f90..def007ec1d6f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -836,12 +836,16 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact(skb, nlh); } +static DEFINE_MUTEX(inet_diag_mutex); + static void inet_diag_rcv(struct sock *sk, int len) { unsigned int qlen = 0; do { + mutex_lock(&inet_diag_mutex); netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } while (qlen); }