From c197facc8ea08062f8f949aade6a33649ee06771 Mon Sep 17 00:00:00 2001 From: "hummerbliss@gmail.com" Date: Mon, 20 Apr 2009 17:12:35 +0200 Subject: [PATCH 01/40] netfilter: bridge: allow fragmentation of VLAN packets traversing a bridge br_nf_dev_queue_xmit only checks for ETH_P_IP packets for fragmenting but not VLAN packets. This results in dropping of large VLAN packets. This can be observed when connection tracking is enabled. Connection tracking re-assembles fragmented packets, and these have to re-fragmented when transmitting out. Also, make sure only refragmented packets are defragmented as per suggestion from Patrick McHardy. Signed-off-by: Saikiran Madugula Signed-off-by: Patrick McHardy --- net/bridge/br_netfilter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3953ac4214c8..e4a418fcb35b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } +#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP) && + if (skb->nfct != NULL && + (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && skb->len > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); } +#else +static int br_nf_dev_queue_xmit(struct sk_buff *skb) +{ + return br_dev_queue_push_xmit(skb); +} +#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, From 5ff482940f5aa2cdc3424c4a8ea94b9833b2af5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 24 Apr 2009 15:37:44 +0200 Subject: [PATCH 02/40] netfilter: nf_ct_dccp/udplite: fix protocol registration error Commit d0dba725 (netfilter: ctnetlink: add callbacks to the per-proto nlattrs) changed the protocol registration function to abort if the to-be registered protocol doesn't provide a new callback function. The DCCP and UDP-Lite IPv6 protocols were missed in this conversion, add the required callback pointer. Reported-and-tested-by: Steven Jan Springl Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_dccp.c | 1 + net/netfilter/nf_conntrack_proto_udplite.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 50dac8dbe7d8..5411d63f31a5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -777,6 +777,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .print_conntrack = dccp_print_conntrack, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = dccp_to_nlattr, + .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4614696c1b88..0badedc542d3 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif From 4b0706624930dc75c3b0d0df463d89759ef7de29 Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Fri, 24 Apr 2009 16:55:25 +0200 Subject: [PATCH 03/40] netfilter: Kconfig: TProxy doesn't depend on NF_CONNTRACK Signed-off-by: Laszlo Attila Toth Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2329c5f50551..881203c4a142 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -275,6 +275,8 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +endif # NF_CONNTRACK + # transparent proxy support config NETFILTER_TPROXY tristate "Transparent proxying support (EXPERIMENTAL)" @@ -290,8 +292,6 @@ config NETFILTER_TPROXY To compile it as a module, choose M here. If unsure, say N. -endif # NF_CONNTRACK - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2009 16:58:41 +0200 Subject: [PATCH 04/40] netfilter: nf_ct_dccp: add missing role attributes for DCCP This patch adds missing role attribute to the DCCP type, otherwise the creation of entries is not of any use. The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the role of the conntrack original tuple. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 29fe9ea1d346..1a865e48b8eb 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -100,6 +100,7 @@ enum ctattr_protoinfo_tcp { enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, + CTA_PROTOINFO_DCCP_ROLE, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5411d63f31a5..8e757dd53396 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +646,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } From 37e55cf0ceb8803256bf69a3e45bd668bf90b76f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 24 Apr 2009 17:05:21 +0200 Subject: [PATCH 05/40] netfilter: xt_recent: fix stack overread in compat code Related-to: commit 325fb5b4d26038cba665dd0d8ee09555321061f0 The compat path suffers from a similar problem. It only uses a __be32 when all of the recent code uses, and expects, an nf_inet_addr everywhere. As a result, addresses stored by xt_recents were filled with whatever other stuff was on the stack following the be32. Signed-off-by: Jan Engelhardt With a minor compile fix from Roman. Reported-and-tested-by: Roman Hoog Antink Signed-off-by: Patrick McHardy --- net/netfilter/xt_recent.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 791e030ea903..eb0ceb846527 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - __be32 addr; + union nf_inet_addr addr = {}; int add; if (size > sizeof(buf)) @@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file, add = 1; break; } - addr = in_aton(c); + addr.ip = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); + e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, (const void *)&addr, - NFPROTO_IPV4, 0); + recent_entry_init(t, &addr, NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); From adc667e84f086aa110d810f3476c494e48eaabaa Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Sat, 25 Apr 2009 18:03:35 -0700 Subject: [PATCH 06/40] vlan: update vlan carrier state for admin up/down Currently, the VLAN event handler does not adjust the VLAN device's carrier state when the real device or the VLAN device is set administratively up or down. The following patch adds a transfer of operating state from the real device to the VLAN device when the real device is administratively set up or down, and sets the carrier state up or down during init, open and close of the VLAN device. This permits observers above the VLAN device that care about the carrier state (bonding's link monitor, for example) to receive updates for administrative changes by more closely mimicing the behavior of real devices. Signed-off-by: Jay Vosburgh Signed-off-by: Patrick McHardy --- net/8021q/vlan.c | 2 ++ net/8021q/vlan_dev.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2b7390e377b3..d1e10546eb85 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs & ~IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; @@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs | IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6b0921364014..b4b9068e55a7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + netif_carrier_on(dev); return 0; clear_allmulti: @@ -471,6 +472,7 @@ del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); out: + netif_carrier_off(dev); return err; } @@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + netif_carrier_off(dev); return 0; } @@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev) struct net_device *real_dev = vlan_dev_info(dev)->real_dev; int subclass = 0; + netif_carrier_off(dev); + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; From a4233304bb43f87f97fc2ac9143b513814dcf094 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Sun, 26 Apr 2009 20:41:34 +0000 Subject: [PATCH 07/40] mlx4_en: Fix cleanup flow on cq activation In case of mlx4_en_activate_cq() failure, the cleanup code would go to rx_err and try to disable unactivated rings. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index 438678ab2a10..7bcc49de1637 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -583,7 +583,7 @@ int mlx4_en_start_port(struct net_device *dev) err = mlx4_en_activate_cq(priv, cq); if (err) { mlx4_err(mdev, "Failed activating Rx CQ\n"); - goto rx_err; + goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; From 785a0982eaaeae2fbe3372d1c9c769e8156a7a5a Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Sun, 26 Apr 2009 20:42:57 +0000 Subject: [PATCH 08/40] mlx4_en: Handle page allocation failure during receive If we failed to allocate new fragments for receive buffer, the packet should be dropped and packets should be reused. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_rx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 0cbb78ca7b29..7942c4d3cd88 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -610,6 +610,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, skb_shinfo(skb)->frags, page_alloc, length); + if (unlikely(!used_frags)) { + kfree_skb(skb); + return NULL; + } skb_shinfo(skb)->nr_frags = used_frags; /* Copy headers into the skb linear buffer */ From c759a6b4e1cae6aff71f58c9c85404ebcd81b6e0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Apr 2009 02:36:20 -0700 Subject: [PATCH 09/40] net: Fix LL_MAX_HEADER for CONFIG_TR_MODULE Unless I miss anything this should fix a bug. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7783f4a755..453be9a674c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -104,7 +104,7 @@ struct wireless_dev; # else # define LL_MAX_HEADER 96 # endif -#elif defined(CONFIG_TR) +#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) # define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 From 8f955d7f042e4ac44891a400d5000928f8db9f58 Mon Sep 17 00:00:00 2001 From: Ayaz Abdulla Date: Sat, 25 Apr 2009 09:17:56 +0000 Subject: [PATCH 10/40] forcedeth: tx timeout fix This patch fixes the tx_timeout() to properly handle the clean up of the tx ring. It also sets the tx put pointer back to the correct position to be in sync with HW. Signed-off-by: Ayaz Abdulla Signed-off-by: David S. Miller --- drivers/net/forcedeth.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 11d5db16ed9c..f9a846b1b92f 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -1880,6 +1880,7 @@ static void nv_init_tx(struct net_device *dev) np->tx_pkts_in_progress = 0; np->tx_change_owner = NULL; np->tx_end_flip = NULL; + np->tx_stop = 0; for (i = 0; i < np->tx_ring_size; i++) { if (!nv_optimized(np)) { @@ -2530,6 +2531,8 @@ static void nv_tx_timeout(struct net_device *dev) struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); u32 status; + union ring_type put_tx; + int saved_tx_limit; if (np->msi_flags & NV_MSI_X_ENABLED) status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK; @@ -2589,24 +2592,32 @@ static void nv_tx_timeout(struct net_device *dev) /* 1) stop tx engine */ nv_stop_tx(dev); - /* 2) check that the packets were not sent already: */ + /* 2) complete any outstanding tx and do not give HW any limited tx pkts */ + saved_tx_limit = np->tx_limit; + np->tx_limit = 0; /* prevent giving HW any limited pkts */ + np->tx_stop = 0; /* prevent waking tx queue */ if (!nv_optimized(np)) nv_tx_done(dev, np->tx_ring_size); else nv_tx_done_optimized(dev, np->tx_ring_size); - /* 3) if there are dead entries: clear everything */ - if (np->get_tx_ctx != np->put_tx_ctx) { - printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name); - nv_drain_tx(dev); - nv_init_tx(dev); - setup_hw_rings(dev, NV_SETUP_TX_RING); - } + /* save current HW postion */ + if (np->tx_change_owner) + put_tx.ex = np->tx_change_owner->first_tx_desc; + else + put_tx = np->put_tx; - netif_wake_queue(dev); + /* 3) clear all tx state */ + nv_drain_tx(dev); + nv_init_tx(dev); - /* 4) restart tx engine */ + /* 4) restore state to current HW position */ + np->get_tx = np->put_tx = put_tx; + np->tx_limit = saved_tx_limit; + + /* 5) restart tx engine */ nv_start_tx(dev); + netif_wake_queue(dev); spin_unlock_irq(&np->lock); } From 6a783c9067e3f71aac61a9262fe42c1f68efd4fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 Apr 2009 02:58:59 -0700 Subject: [PATCH 11/40] xfrm: wrong hash value for temporary SA When kernel inserts a temporary SA for IKE, it uses the wrong hash value for dst list. Two hash values were calcultated before: one with source address and one with a wildcard source address. Bug hinted by Junwei Zhang Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 82271720d970..5f1f86565f16 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, { static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); - unsigned int h; + unsigned int h, h_wildcard; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; @@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (best) goto found; - h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && From ae0e8e82205c903978a79ebf5e31c670b61fa5b4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 27 Apr 2009 03:04:58 -0700 Subject: [PATCH 12/40] veth: prevent oops caused by netdev destructor From: Stephen Hemminger The veth driver will oops if sysfs hooks are open while module is removed. The net device destructor can not point to code in a module; basically there are only two possible safe values: NULL - no destructor, or free_netdev - free on last use Signed-off-by: David S. Miller --- drivers/net/veth.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 015db1cece72..8e56fcf0a0e3 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -210,14 +210,11 @@ rx_drop: static struct net_device_stats *veth_get_stats(struct net_device *dev) { - struct veth_priv *priv; - struct net_device_stats *dev_stats; - int cpu; + struct veth_priv *priv = netdev_priv(dev); + struct net_device_stats *dev_stats = &dev->stats; + unsigned int cpu; struct veth_net_stats *stats; - priv = netdev_priv(dev); - dev_stats = &dev->stats; - dev_stats->rx_packets = 0; dev_stats->tx_packets = 0; dev_stats->rx_bytes = 0; @@ -225,16 +222,17 @@ static struct net_device_stats *veth_get_stats(struct net_device *dev) dev_stats->tx_dropped = 0; dev_stats->rx_dropped = 0; - for_each_online_cpu(cpu) { - stats = per_cpu_ptr(priv->stats, cpu); + if (priv->stats) + for_each_online_cpu(cpu) { + stats = per_cpu_ptr(priv->stats, cpu); - dev_stats->rx_packets += stats->rx_packets; - dev_stats->tx_packets += stats->tx_packets; - dev_stats->rx_bytes += stats->rx_bytes; - dev_stats->tx_bytes += stats->tx_bytes; - dev_stats->tx_dropped += stats->tx_dropped; - dev_stats->rx_dropped += stats->rx_dropped; - } + dev_stats->rx_packets += stats->rx_packets; + dev_stats->tx_packets += stats->tx_packets; + dev_stats->rx_bytes += stats->rx_bytes; + dev_stats->tx_bytes += stats->tx_bytes; + dev_stats->tx_dropped += stats->tx_dropped; + dev_stats->rx_dropped += stats->rx_dropped; + } return dev_stats; } @@ -261,6 +259,8 @@ static int veth_close(struct net_device *dev) netif_carrier_off(dev); netif_carrier_off(priv->peer); + free_percpu(priv->stats); + priv->stats = NULL; return 0; } @@ -291,15 +291,6 @@ static int veth_dev_init(struct net_device *dev) return 0; } -static void veth_dev_free(struct net_device *dev) -{ - struct veth_priv *priv; - - priv = netdev_priv(dev); - free_percpu(priv->stats); - free_netdev(dev); -} - static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, @@ -317,7 +308,7 @@ static void veth_setup(struct net_device *dev) dev->netdev_ops = &veth_netdev_ops; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; - dev->destructor = veth_dev_free; + dev->destructor = free_netdev; } /* From 495dce123ceabbae035552437fcaa0f69247ff08 Mon Sep 17 00:00:00 2001 From: "Waskiewicz Jr, Peter P" Date: Thu, 23 Apr 2009 11:15:18 +0000 Subject: [PATCH 13/40] ixgbe: Fix WoL functionality for 82599 KX4 devices The current code writes the PME enabled bit in PCI config space which is wrong. This was needed for pre-release hardware, and was not removed from the driver. Also, we need to clear the WUS (wake up status) after we resume. Otherwise we can't wake for the same event again since it's still asserted in the hardware. Plus, the multicast lists were being written improperly, causing multicast WoL to fail. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_common.c | 51 ++------------------------------ drivers/net/ixgbe/ixgbe_main.c | 10 +++---- 2 files changed, 6 insertions(+), 55 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 5567519676d5..186a65069b33 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -50,7 +50,6 @@ static u16 ixgbe_calc_eeprom_checksum(struct ixgbe_hw *hw); static void ixgbe_enable_rar(struct ixgbe_hw *hw, u32 index); static void ixgbe_disable_rar(struct ixgbe_hw *hw, u32 index); static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr); -static void ixgbe_add_mc_addr(struct ixgbe_hw *hw, u8 *mc_addr); static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); /** @@ -1377,8 +1376,7 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, * Clear accounting of old secondary address list, * don't count RAR[0] */ - uc_addr_in_use = hw->addr_ctrl.rar_used_count - - hw->addr_ctrl.mc_addr_in_rar_count - 1; + uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1; hw->addr_ctrl.rar_used_count -= uc_addr_in_use; hw->addr_ctrl.overflow_promisc = 0; @@ -1492,40 +1490,6 @@ static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr) IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } -/** - * ixgbe_add_mc_addr - Adds a multicast address. - * @hw: pointer to hardware structure - * @mc_addr: new multicast address - * - * Adds it to unused receive address register or to the multicast table. - **/ -static void ixgbe_add_mc_addr(struct ixgbe_hw *hw, u8 *mc_addr) -{ - u32 rar_entries = hw->mac.num_rar_entries; - u32 rar; - - hw_dbg(hw, " MC Addr =%.2X %.2X %.2X %.2X %.2X %.2X\n", - mc_addr[0], mc_addr[1], mc_addr[2], - mc_addr[3], mc_addr[4], mc_addr[5]); - - /* - * Place this multicast address in the RAR if there is room, - * else put it in the MTA - */ - if (hw->addr_ctrl.rar_used_count < rar_entries) { - /* use RAR from the end up for multicast */ - rar = rar_entries - hw->addr_ctrl.mc_addr_in_rar_count - 1; - hw->mac.ops.set_rar(hw, rar, mc_addr, 0, IXGBE_RAH_AV); - hw_dbg(hw, "Added a multicast address to RAR[%d]\n", rar); - hw->addr_ctrl.rar_used_count++; - hw->addr_ctrl.mc_addr_in_rar_count++; - } else { - ixgbe_set_mta(hw, mc_addr); - } - - hw_dbg(hw, "ixgbe_add_mc_addr Complete\n"); -} - /** * ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses * @hw: pointer to hardware structure @@ -1542,7 +1506,6 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, ixgbe_mc_addr_itr next) { u32 i; - u32 rar_entries = hw->mac.num_rar_entries; u32 vmdq; /* @@ -1550,18 +1513,8 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, * use. */ hw->addr_ctrl.num_mc_addrs = mc_addr_count; - hw->addr_ctrl.rar_used_count -= hw->addr_ctrl.mc_addr_in_rar_count; - hw->addr_ctrl.mc_addr_in_rar_count = 0; hw->addr_ctrl.mta_in_use = 0; - /* Zero out the other receive addresses. */ - hw_dbg(hw, "Clearing RAR[%d-%d]\n", hw->addr_ctrl.rar_used_count, - rar_entries - 1); - for (i = hw->addr_ctrl.rar_used_count; i < rar_entries; i++) { - IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0); - } - /* Clear the MTA */ hw_dbg(hw, " Clearing MTA\n"); for (i = 0; i < hw->mac.mcft_size; i++) @@ -1570,7 +1523,7 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, /* Add the new addresses */ for (i = 0; i < mc_addr_count; i++) { hw_dbg(hw, " Adding the multicast addresses:\n"); - ixgbe_add_mc_addr(hw, next(hw, &mc_addr_list, &vmdq)); + ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq)); } /* Enable mta */ diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 01884256f4c9..07e778d3e5d2 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3646,6 +3646,8 @@ static int ixgbe_resume(struct pci_dev *pdev) ixgbe_reset(adapter); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); + if (netif_running(netdev)) { err = ixgbe_open(adapter->netdev); if (err) @@ -4575,7 +4577,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; static int cards_found; int i, err, pci_using_dac; - u16 pm_value = 0; u32 part_num, eec; err = pci_enable_device(pdev); @@ -4763,11 +4764,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, switch (pdev->device) { case IXGBE_DEV_ID_82599_KX4: -#define IXGBE_PCIE_PMCSR 0x44 - adapter->wol = IXGBE_WUFC_MAG; - pci_read_config_word(pdev, IXGBE_PCIE_PMCSR, &pm_value); - pci_write_config_word(pdev, IXGBE_PCIE_PMCSR, - (pm_value | (1 << 8))); + adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX | + IXGBE_WUFC_MC | IXGBE_WUFC_BC); break; default: adapter->wol = 0; From 7ced70c47f68ad672f50781de5adc6d41e6d2866 Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Thu, 23 Apr 2009 02:24:21 +0000 Subject: [PATCH 14/40] update Documentation/isdn/00-INDEX After the merging of mISDN, state which files refer only to the old isdn4linux subsystem. Also add a few missing files. Signed-off-by: Tilman Schmidt Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- Documentation/isdn/00-INDEX | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX index 9fee5f2e5c62..d9660ee5ab27 100644 --- a/Documentation/isdn/00-INDEX +++ b/Documentation/isdn/00-INDEX @@ -2,8 +2,12 @@ - this file (info on ISDN implementation for Linux) CREDITS - list of the kind folks that brought you this stuff. +HiSax.cert + - information about the ITU approval certification of the HiSax driver. INTERFACE - - description of Linklevel and Hardwarelevel ISDN interface. + - description of isdn4linux Link Level and Hardware Level interfaces. +INTERFACE.fax + - description of the fax subinterface of isdn4linux. README - general info on what you need and what to do for Linux ISDN. README.FAQ @@ -12,6 +16,8 @@ README.audio - info for running audio over ISDN. README.fax - info for using Fax over ISDN. +README.gigaset + - info on the drivers for Siemens Gigaset ISDN adapters. README.icn - info on the ICN-ISDN-card and its driver. README.HiSax @@ -37,7 +43,8 @@ README.diversion README.sc - info on driver for Spellcaster cards. README.x25 - _ info for running X.25 over ISDN. + - info for running X.25 over ISDN. README.hysdn - - info on driver for Hypercope active HYSDN cards - + - info on driver for Hypercope active HYSDN cards +README.mISDN + - info on the Modular ISDN subsystem (mISDN). From 554f200e22a13e19bd407d0037e41be0ec8a0a2e Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Thu, 23 Apr 2009 02:24:21 +0000 Subject: [PATCH 15/40] Documentation/isdn/INTERFACE.CAPI isdn: document Kernel CAPI driver interface Create a file Documentation/isdn/INTERFACE.CAPI describing the interface between the kernel CAPI subsystem and ISDN device drivers, analogous to the existing Documentation/isdn/INTERFACE for the old isdn4linux subsystem. Also add kerneldoc comments to the exported functions in drivers/isdn/capi/kcapi.c. Impact: Documentation Signed-off-by: Tilman Schmidt Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- Documentation/isdn/00-INDEX | 2 + Documentation/isdn/INTERFACE.CAPI | 207 ++++++++++++++++++++++++++++++ drivers/isdn/capi/kcapi.c | 171 ++++++++++++++++++++++++ 3 files changed, 380 insertions(+) create mode 100644 Documentation/isdn/INTERFACE.CAPI diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX index d9660ee5ab27..5a2d69989a8c 100644 --- a/Documentation/isdn/00-INDEX +++ b/Documentation/isdn/00-INDEX @@ -8,6 +8,8 @@ INTERFACE - description of isdn4linux Link Level and Hardware Level interfaces. INTERFACE.fax - description of the fax subinterface of isdn4linux. +INTERFACE.CAPI + - description of kernel CAPI Link Level to Hardware Level interface. README - general info on what you need and what to do for Linux ISDN. README.FAQ diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI new file mode 100644 index 000000000000..8947ffcda16e --- /dev/null +++ b/Documentation/isdn/INTERFACE.CAPI @@ -0,0 +1,207 @@ +Kernel CAPI Interface to Hardware Drivers +----------------------------------------- + +1. Overview + +Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI +hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI +lingo) with Kernel CAPI to indicate their readiness to provide their service +to CAPI applications. CAPI applications also register with Kernel CAPI, +requesting association with a CAPI device. Kernel CAPI then dispatches the +application registration to an available device, forwarding it to the +corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both +directions between the application and the hardware driver. + + +2. Driver and Device Registration + +CAPI drivers optionally register themselves with Kernel CAPI by calling the +Kernel CAPI function register_capi_driver() with a pointer to a struct +capi_driver. This structure must be filled with the name and revision of the +driver, and optionally a pointer to a callback function, add_card(). The +registration can be revoked by calling the function unregister_capi_driver() +with a pointer to the same struct capi_driver. + +CAPI drivers must register each of the ISDN devices they control with Kernel +CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a +struct capi_ctr before they can be used. This structure must be filled with +the names of the driver and controller, and a number of callback function +pointers which are subsequently used by Kernel CAPI for communicating with the +driver. The registration can be revoked by calling the function +detach_capi_ctr() with a pointer to the same struct capi_ctr. + +Before the device can be actually used, the driver must fill in the device +information fields 'manu', 'version', 'profile' and 'serial' in the capi_ctr +structure of the device, and signal its readiness by calling capi_ctr_ready(). +From then on, Kernel CAPI may call the registered callback functions for the +device. + +If the device becomes unusable for any reason (shutdown, disconnect ...), the +driver has to call capi_ctr_reseted(). This will prevent further calls to the +callback functions by Kernel CAPI. + + +3. Application Registration and Communication + +Kernel CAPI forwards registration requests from applications (calls to CAPI +operation CAPI_REGISTER) to an appropriate hardware driver by calling its +register_appl() callback function. A unique Application ID (ApplID, u16) is +allocated by Kernel CAPI and passed to register_appl() along with the +parameter structure provided by the application. This is analogous to the +open() operation on regular files or character devices. + +After a successful return from register_appl(), CAPI messages from the +application may be passed to the driver for the device via calls to the +send_message() callback function. The CAPI message to send is stored in the +data portion of a skb. Conversely, the driver may call Kernel CAPI's +capi_ctr_handle_message() function to pass a received CAPI message to Kernel +CAPI for forwarding to an application, specifying its ApplID. + +Format and semantics of CAPI messages are specified in the CAPI 2.0 standard. + +Deregistration requests (CAPI operation CAPI_RELEASE) from applications are +forwarded as calls to the release_appl() callback function, passing the same +ApplID as with register_appl(). After return from release_appl(), no CAPI +messages for that application may be passed to or from the device anymore. + + +4. Data Structures + +4.1 struct capi_driver + +This structure describes a Kernel CAPI driver itself. It is used in the +register_capi_driver() and unregister_capi_driver() functions, and contains +the following non-private fields, all to be set by the driver before calling +register_capi_driver(): + +char name[32] + the name of the driver, as a zero terminated ASCII string +char revision[32] + the revision number of the driver, as a zero terminated ASCII string +int (*add_card)(struct capi_driver *driver, capicardparams *data) + a callback function pointer (may be NULL) + + +4.2 struct capi_ctr + +This structure describes an ISDN device (controller) handled by a Kernel CAPI +driver. After registration via the attach_capi_ctr() function it is passed to +all controller specific lower layer interface and callback functions to +identify the controller to operate on. + +It contains the following non-private fields: + +- to be set by the driver before calling attach_capi_ctr(): + +struct module *owner + pointer to the driver module owning the device + +void *driverdata + an opaque pointer to driver specific data, not touched by Kernel CAPI + +char name[32] + the name of the controller, as a zero terminated ASCII string + +char *driver_name + the name of the driver, as a zero terminated ASCII string + +int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata) + (optional) pointer to a callback function for sending firmware and + configuration data to the device + +void (*reset_ctr)(struct capi_ctr *ctrlr) + pointer to a callback function for performing a reset on the device, + releasing all registered applications + +void (*register_appl)(struct capi_ctr *ctrlr, u16 applid, + capi_register_params *rparam) +void (*release_appl)(struct capi_ctr *ctrlr, u16 applid) + pointers to callback functions for registration and deregistration of + applications with the device + +u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb) + pointer to a callback function for sending a CAPI message to the + device + +char *(*procinfo)(struct capi_ctr *ctrlr) + pointer to a callback function returning the entry for the device in + the CAPI controller info table, /proc/capi/controller + +read_proc_t *ctr_read_proc + pointer to the read_proc callback function for the device's proc file + system entry, /proc/capi/controllers/; will be called with a + pointer to the device's capi_ctr structure as the last (data) argument + +- to be filled in before calling capi_ctr_ready(): + +u8 manu[CAPI_MANUFACTURER_LEN] + value to return for CAPI_GET_MANUFACTURER + +capi_version version + value to return for CAPI_GET_VERSION + +capi_profile profile + value to return for CAPI_GET_PROFILE + +u8 serial[CAPI_SERIAL_LEN] + value to return for CAPI_GET_SERIAL + + +5. Lower Layer Interface Functions + +(declared in ) + +void register_capi_driver(struct capi_driver *drvr) +void unregister_capi_driver(struct capi_driver *drvr) + register/unregister a driver with Kernel CAPI + +int attach_capi_ctr(struct capi_ctr *ctrlr) +int detach_capi_ctr(struct capi_ctr *ctrlr) + register/unregister a device (controller) with Kernel CAPI + +void capi_ctr_ready(struct capi_ctr *ctrlr) +void capi_ctr_reseted(struct capi_ctr *ctrlr) + signal controller ready/not ready + +void capi_ctr_suspend_output(struct capi_ctr *ctrlr) +void capi_ctr_resume_output(struct capi_ctr *ctrlr) + signal suspend/resume + +void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid, + struct sk_buff *skb) + pass a received CAPI message to Kernel CAPI + for forwarding to the specified application + + +6. Helper Functions and Macros + +Library functions (from ): + +void capilib_new_ncci(struct list_head *head, u16 applid, + u32 ncci, u32 winsize) +void capilib_free_ncci(struct list_head *head, u16 applid, u32 ncci) +void capilib_release_appl(struct list_head *head, u16 applid) +void capilib_release(struct list_head *head) +void capilib_data_b3_conf(struct list_head *head, u16 applid, + u32 ncci, u16 msgid) +u16 capilib_data_b3_req(struct list_head *head, u16 applid, + u32 ncci, u16 msgid) + + +Macros to extract/set element values from/in a CAPI message header +(from ): + +Get Macro Set Macro Element (Type) + +CAPIMSG_LEN(m) CAPIMSG_SETLEN(m, len) Total Length (u16) +CAPIMSG_APPID(m) CAPIMSG_SETAPPID(m, applid) ApplID (u16) +CAPIMSG_COMMAND(m) CAPIMSG_SETCOMMAND(m,cmd) Command (u8) +CAPIMSG_SUBCOMMAND(m) CAPIMSG_SETSUBCOMMAND(m, cmd) Subcommand (u8) +CAPIMSG_CMD(m) - Command*256 + + Subcommand (u16) +CAPIMSG_MSGID(m) CAPIMSG_SETMSGID(m, msgid) Message Number (u16) + +CAPIMSG_CONTROL(m) CAPIMSG_SETCONTROL(m, contr) Controller/PLCI/NCCI + (u32) +CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16) + diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 5360c4fd4739..f33170368cd1 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -270,6 +270,15 @@ static void recv_handler(struct work_struct *work) mutex_unlock(&ap->recv_mtx); } +/** + * capi_ctr_handle_message() - handle incoming CAPI message + * @card: controller descriptor structure. + * @appl: application ID. + * @skb: message. + * + * Called by hardware driver to pass a CAPI message to the application. + */ + void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *skb) { struct capi20_appl *ap; @@ -348,6 +357,13 @@ error: EXPORT_SYMBOL(capi_ctr_handle_message); +/** + * capi_ctr_ready() - signal CAPI controller ready + * @card: controller descriptor structure. + * + * Called by hardware driver to signal that the controller is up and running. + */ + void capi_ctr_ready(struct capi_ctr * card) { card->cardstate = CARD_RUNNING; @@ -360,6 +376,14 @@ void capi_ctr_ready(struct capi_ctr * card) EXPORT_SYMBOL(capi_ctr_ready); +/** + * capi_ctr_reseted() - signal CAPI controller reset + * @card: controller descriptor structure. + * + * Called by hardware driver to signal that the controller is down and + * unavailable for use. + */ + void capi_ctr_reseted(struct capi_ctr * card) { u16 appl; @@ -391,6 +415,13 @@ void capi_ctr_reseted(struct capi_ctr * card) EXPORT_SYMBOL(capi_ctr_reseted); +/** + * capi_ctr_suspend_output() - suspend controller + * @card: controller descriptor structure. + * + * Called by hardware driver to stop data flow. + */ + void capi_ctr_suspend_output(struct capi_ctr *card) { if (!card->blocked) { @@ -401,6 +432,13 @@ void capi_ctr_suspend_output(struct capi_ctr *card) EXPORT_SYMBOL(capi_ctr_suspend_output); +/** + * capi_ctr_resume_output() - resume controller + * @card: controller descriptor structure. + * + * Called by hardware driver to resume data flow. + */ + void capi_ctr_resume_output(struct capi_ctr *card) { if (card->blocked) { @@ -413,6 +451,14 @@ EXPORT_SYMBOL(capi_ctr_resume_output); /* ------------------------------------------------------------- */ +/** + * attach_capi_ctr() - register CAPI controller + * @card: controller descriptor structure. + * + * Called by hardware driver to register a controller with the CAPI subsystem. + * Return value: 0 on success, error code < 0 on error + */ + int attach_capi_ctr(struct capi_ctr *card) { @@ -459,6 +505,15 @@ attach_capi_ctr(struct capi_ctr *card) EXPORT_SYMBOL(attach_capi_ctr); +/** + * detach_capi_ctr() - unregister CAPI controller + * @card: controller descriptor structure. + * + * Called by hardware driver to remove the registration of a controller + * with the CAPI subsystem. + * Return value: 0 on success, error code < 0 on error + */ + int detach_capi_ctr(struct capi_ctr *card) { if (card->cardstate != CARD_DETECTED) @@ -479,6 +534,13 @@ int detach_capi_ctr(struct capi_ctr *card) EXPORT_SYMBOL(detach_capi_ctr); +/** + * register_capi_driver() - register CAPI driver + * @driver: driver descriptor structure. + * + * Called by hardware driver to register itself with the CAPI subsystem. + */ + void register_capi_driver(struct capi_driver *driver) { unsigned long flags; @@ -490,6 +552,13 @@ void register_capi_driver(struct capi_driver *driver) EXPORT_SYMBOL(register_capi_driver); +/** + * unregister_capi_driver() - unregister CAPI driver + * @driver: driver descriptor structure. + * + * Called by hardware driver to unregister itself from the CAPI subsystem. + */ + void unregister_capi_driver(struct capi_driver *driver) { unsigned long flags; @@ -505,6 +574,13 @@ EXPORT_SYMBOL(unregister_capi_driver); /* -------- CAPI2.0 Interface ---------------------------------- */ /* ------------------------------------------------------------- */ +/** + * capi20_isinstalled() - CAPI 2.0 operation CAPI_INSTALLED + * + * Return value: CAPI result code (CAPI_NOERROR if at least one ISDN controller + * is ready for use, CAPI_REGNOTINSTALLED otherwise) + */ + u16 capi20_isinstalled(void) { int i; @@ -517,6 +593,18 @@ u16 capi20_isinstalled(void) EXPORT_SYMBOL(capi20_isinstalled); +/** + * capi20_register() - CAPI 2.0 operation CAPI_REGISTER + * @ap: CAPI application descriptor structure. + * + * Register an application's presence with CAPI. + * A unique application ID is assigned and stored in @ap->applid. + * After this function returns successfully, the message receive + * callback function @ap->recv_message() may be called at any time + * until capi20_release() has been called for the same @ap. + * Return value: CAPI result code + */ + u16 capi20_register(struct capi20_appl *ap) { int i; @@ -571,6 +659,16 @@ u16 capi20_register(struct capi20_appl *ap) EXPORT_SYMBOL(capi20_register); +/** + * capi20_release() - CAPI 2.0 operation CAPI_RELEASE + * @ap: CAPI application descriptor structure. + * + * Terminate an application's registration with CAPI. + * After this function returns successfully, the message receive + * callback function @ap->recv_message() will no longer be called. + * Return value: CAPI result code + */ + u16 capi20_release(struct capi20_appl *ap) { int i; @@ -603,6 +701,15 @@ u16 capi20_release(struct capi20_appl *ap) EXPORT_SYMBOL(capi20_release); +/** + * capi20_put_message() - CAPI 2.0 operation CAPI_PUT_MESSAGE + * @ap: CAPI application descriptor structure. + * @skb: CAPI message. + * + * Transfer a single message to CAPI. + * Return value: CAPI result code + */ + u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb) { struct capi_ctr *card; @@ -668,6 +775,16 @@ u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb) EXPORT_SYMBOL(capi20_put_message); +/** + * capi20_get_manufacturer() - CAPI 2.0 operation CAPI_GET_MANUFACTURER + * @contr: controller number. + * @buf: result buffer (64 bytes). + * + * Retrieve information about the manufacturer of the specified ISDN controller + * or (for @contr == 0) the driver itself. + * Return value: CAPI result code + */ + u16 capi20_get_manufacturer(u32 contr, u8 *buf) { struct capi_ctr *card; @@ -685,6 +802,16 @@ u16 capi20_get_manufacturer(u32 contr, u8 *buf) EXPORT_SYMBOL(capi20_get_manufacturer); +/** + * capi20_get_version() - CAPI 2.0 operation CAPI_GET_VERSION + * @contr: controller number. + * @verp: result structure. + * + * Retrieve version information for the specified ISDN controller + * or (for @contr == 0) the driver itself. + * Return value: CAPI result code + */ + u16 capi20_get_version(u32 contr, struct capi_version *verp) { struct capi_ctr *card; @@ -703,6 +830,16 @@ u16 capi20_get_version(u32 contr, struct capi_version *verp) EXPORT_SYMBOL(capi20_get_version); +/** + * capi20_get_serial() - CAPI 2.0 operation CAPI_GET_SERIAL_NUMBER + * @contr: controller number. + * @serial: result buffer (8 bytes). + * + * Retrieve the serial number of the specified ISDN controller + * or (for @contr == 0) the driver itself. + * Return value: CAPI result code + */ + u16 capi20_get_serial(u32 contr, u8 *serial) { struct capi_ctr *card; @@ -721,6 +858,16 @@ u16 capi20_get_serial(u32 contr, u8 *serial) EXPORT_SYMBOL(capi20_get_serial); +/** + * capi20_get_profile() - CAPI 2.0 operation CAPI_GET_PROFILE + * @contr: controller number. + * @profp: result structure. + * + * Retrieve capability information for the specified ISDN controller + * or (for @contr == 0) the number of installed controllers. + * Return value: CAPI result code + */ + u16 capi20_get_profile(u32 contr, struct capi_profile *profp) { struct capi_ctr *card; @@ -903,6 +1050,15 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) } #endif +/** + * capi20_manufacturer() - CAPI 2.0 operation CAPI_MANUFACTURER + * @cmd: command. + * @data: parameter. + * + * Perform manufacturer specific command. + * Return value: CAPI result code + */ + int capi20_manufacturer(unsigned int cmd, void __user *data) { struct capi_ctr *card; @@ -981,6 +1137,21 @@ int capi20_manufacturer(unsigned int cmd, void __user *data) EXPORT_SYMBOL(capi20_manufacturer); /* temporary hack */ + +/** + * capi20_set_callback() - set CAPI application notification callback function + * @ap: CAPI application descriptor structure. + * @callback: callback function (NULL to remove). + * + * If not NULL, the callback function will be called to notify the + * application of the addition or removal of a controller. + * The first argument (cmd) will tell whether the controller was added + * (KCI_CONTRUP) or removed (KCI_CONTRDOWN). + * The second argument (contr) will be the controller number. + * For cmd==KCI_CONTRUP the third argument (data) will be a pointer to the + * new controller's capability profile structure. + */ + void capi20_set_callback(struct capi20_appl *ap, void (*callback) (unsigned int cmd, __u32 contr, void *data)) { From 2296e5a0136f7ba64c99f3a48a55a687aa9abcc8 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Thu, 23 Apr 2009 02:24:21 +0000 Subject: [PATCH 16/40] Add reference to CAPI 2.0 standard Move the entry about CAPI 2.0 to the beginning and add a URL. Incorporate changes suggested by Randy Dunlap, thanks for proofreading. Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- Documentation/isdn/INTERFACE.CAPI | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI index 8947ffcda16e..786d619b36e5 100644 --- a/Documentation/isdn/INTERFACE.CAPI +++ b/Documentation/isdn/INTERFACE.CAPI @@ -3,6 +3,11 @@ Kernel CAPI Interface to Hardware Drivers 1. Overview +From the CAPI 2.0 specification: +COMMON-ISDN-API (CAPI) is an application programming interface standard used +to access ISDN equipment connected to basic rate interfaces (BRI) and primary +rate interfaces (PRI). + Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI lingo) with Kernel CAPI to indicate their readiness to provide their service @@ -12,6 +17,9 @@ application registration to an available device, forwarding it to the corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both directions between the application and the hardware driver. +Format and semantics of CAPI messages are specified in the CAPI 2.0 standard. +This standard is freely available from http://www.capi.org. + 2. Driver and Device Registration @@ -53,12 +61,10 @@ open() operation on regular files or character devices. After a successful return from register_appl(), CAPI messages from the application may be passed to the driver for the device via calls to the send_message() callback function. The CAPI message to send is stored in the -data portion of a skb. Conversely, the driver may call Kernel CAPI's +data portion of an skb. Conversely, the driver may call Kernel CAPI's capi_ctr_handle_message() function to pass a received CAPI message to Kernel CAPI for forwarding to an application, specifying its ApplID. -Format and semantics of CAPI messages are specified in the CAPI 2.0 standard. - Deregistration requests (CAPI operation CAPI_RELEASE) from applications are forwarded as calls to the release_appl() callback function, passing the same ApplID as with register_appl(). After return from release_appl(), no CAPI @@ -75,9 +81,9 @@ the following non-private fields, all to be set by the driver before calling register_capi_driver(): char name[32] - the name of the driver, as a zero terminated ASCII string + the name of the driver, as a zero-terminated ASCII string char revision[32] - the revision number of the driver, as a zero terminated ASCII string + the revision number of the driver, as a zero-terminated ASCII string int (*add_card)(struct capi_driver *driver, capicardparams *data) a callback function pointer (may be NULL) @@ -100,10 +106,10 @@ void *driverdata an opaque pointer to driver specific data, not touched by Kernel CAPI char name[32] - the name of the controller, as a zero terminated ASCII string + the name of the controller, as a zero-terminated ASCII string char *driver_name - the name of the driver, as a zero terminated ASCII string + the name of the driver, as a zero-terminated ASCII string int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata) (optional) pointer to a callback function for sending firmware and From c9503e0fe052020e0294cd07d0ecd982eb7c9177 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 27 Apr 2009 05:42:24 -0700 Subject: [PATCH 17/40] ipv4: Limit size of route cache hash table Right now we have no upper limit on the size of the route cache hash table. On a 128GB POWER6 box it ends up as 32MB: IP route cache hash table entries: 4194304 (order: 9, 33554432 bytes) It would be nice to cap this for memory consumption reasons, but a massive hashtable also causes a significant spike when measuring OS jitter. With a 32MB hashtable and 4 million entries, rt_worker_func is taking 5 ms to complete. On another system with more memory it's taking 14 ms. Even though rt_worker_func does call cond_sched() to limit its impact, in an HPC environment we want to keep all sources of OS jitter to a minimum. With the patch applied we limit the number of entries to 512k which can still be overriden by using the rt_entries boot option: IP route cache hash table entries: 524288 (order: 6, 4194304 bytes) With this patch rt_worker_func now takes 0.460 ms on the same system. Signed-off-by: Anton Blanchard Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c40debe51b38..c4c60e9f068a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3397,7 +3397,7 @@ int __init ip_rt_init(void) 0, &rt_hash_log, &rt_hash_mask, - 0); + rhash_entries ? 0 : 512 * 1024); memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); rt_hash_lock_init(); From 37b607c5ac3b7c92a6a3624bb29f1cdcdcf7044a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 27 Apr 2009 05:45:54 -0700 Subject: [PATCH 18/40] net: Fix typo in net_device_ops description. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 453be9a674c0..5a96a1a406e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -500,7 +500,7 @@ struct netdev_queue { * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address - * needs to be changed. If not this interface is not defined, the + * needs to be changed. If this interface is not defined, the * mac address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); From 802b352f2934f799ec2e159f61db6506094a936e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 27 Apr 2009 21:24:28 -0700 Subject: [PATCH 19/40] ecryptfs: fix printk format warning fs/ecryptfs/inode.c:670: warning: format '%d' expects type 'int', but argument 3 has type 'size_t' Signed-off-by: Randy Dunlap Signed-off-by: Tyler Hicks Cc: Dustin Kirkland Signed-off-by: Andrew Morton --- fs/ecryptfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 93bc0f8174a7..1940edd08307 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -667,7 +667,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); if (lower_buf == NULL) { printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%d] bytes\n", __func__, lower_bufsiz); + "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); rc = -ENOMEM; goto out; } From ac20100df7a7a042423dcb8847f42d9f6ddb8d00 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 27 Apr 2009 13:31:12 -0500 Subject: [PATCH 20/40] eCryptfs: Fix min function comparison warning This warning shows up on 64 bit builds: fs/ecryptfs/inode.c:693: warning: comparison of distinct pointer types lacks a cast Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1940edd08307..2f0945d63297 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -690,7 +690,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) } /* Check for bufsiz <= 0 done in sys_readlinkat() */ rc = copy_to_user(buf, plaintext_name, - min((unsigned) bufsiz, plaintext_name_size)); + min((size_t) bufsiz, plaintext_name_size)); if (rc) rc = -EFAULT; else From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2009 02:24:21 -0700 Subject: [PATCH 21/40] net: Avoid extra wakeups of threads blocked in wait_for_packet() In 2.6.25 we added UDP mem accounting. This unfortunatly added a penalty when a frame is transmitted, since we have at TX completion time to call sock_wfree() to perform necessary memory accounting. This calls sock_def_write_space() and utimately scheduler if any thread is waiting on the socket. Thread(s) waiting for an incoming frame was scheduled, then had to sleep again as event was meaningless. (All threads waiting on a socket are using same sk_sleep anchor) This adds lot of extra wakeups and increases latencies, as noted by Christoph Lameter, and slows down softirq handler. Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2 Fortunatly, Davide Libenzi recently added concept of keyed wakeups into kernel, and particularly for sockets (see commit 37e5540b3c9d838eb20f2ca8ea2eb8072271e403 epoll keyed wakeups: make sockets use keyed wakeups) Davide goal was to optimize epoll, but this new wakeup infrastructure can help non epoll users as well, if they care to setup an appropriate handler. This patch introduces new DEFINE_WAIT_FUNC() helper and uses it in wait_for_packet(), so that only relevant event can wakeup a thread blocked in this function. Trace of function calls from bnx2 TX completion bnx2_poll_work() is : __kfree_skb() skb_release_head_state() sock_wfree() sock_def_write_space() __wake_up_sync_key() __wake_up_common() receiver_wake_function() : Stops here since thread is waiting for an INPUT Reported-by: Christoph Lameter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/wait.h | 6 ++++-- net/core/datagram.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eaee..bc024632f365 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -440,13 +440,15 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -#define DEFINE_WAIT(name) \ +#define DEFINE_WAIT_FUNC(name, function) \ wait_queue_t name = { \ .private = current, \ - .func = autoremove_wake_function, \ + .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ } +#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) + #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..b01a76abe1d2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); From f3784d834c71689336fa272df420b45345cb6b84 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 23 Apr 2009 14:50:54 +0300 Subject: [PATCH 22/40] Bluetooth: Ensure that HCI sysfs add/del is preempt safe Use a different work_struct variables for add_conn() and del_conn() and use single work queue instead of two for adding and deleting connections. It eliminates the following error on a preemptible kernel: [ 204.358032] Unable to handle kernel NULL pointer dereference at virtual address 0000000c [ 204.370697] pgd = c0004000 [ 204.373443] [0000000c] *pgd=00000000 [ 204.378601] Internal error: Oops: 17 [#1] PREEMPT [ 204.383361] Modules linked in: vfat fat rfcomm sco l2cap sd_mod scsi_mod iphb pvr2d drm omaplfb ps [ 204.438537] CPU: 0 Not tainted (2.6.28-maemo2 #1) [ 204.443664] PC is at klist_put+0x2c/0xb4 [ 204.447601] LR is at klist_put+0x18/0xb4 [ 204.451568] pc : [] lr : [] psr: a0000113 [ 204.451568] sp : cf1b3f10 ip : cf1b3f10 fp : cf1b3f2c [ 204.463104] r10: 00000000 r9 : 00000000 r8 : bf08029c [ 204.468353] r7 : c7869200 r6 : cfbe2690 r5 : c78692c8 r4 : 00000001 [ 204.474945] r3 : 00000001 r2 : cf1b2000 r1 : 00000001 r0 : 00000000 [ 204.481506] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel [ 204.488861] Control: 10c5387d Table: 887fc018 DAC: 00000017 [ 204.494628] Process btdelconn (pid: 515, stack limit = 0xcf1b22e0) Signed-off-by: Roger Quadros Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_sysfs.c | 37 ++++++++++++++------------------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 01f9316b4c23..1224bba24bdd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -180,7 +180,8 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; - struct work_struct work; + struct work_struct work_add; + struct work_struct work_del; struct device dev; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ed82796d4a0f..b7c51082ddeb 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -9,8 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *btaddconn; -static struct workqueue_struct *btdelconn; +static struct workqueue_struct *bluetooth; static inline char *link_typetostr(int type) { @@ -88,9 +87,10 @@ static struct device_type bt_link = { static void add_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_add); - flush_workqueue(btdelconn); + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); @@ -114,9 +114,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) device_initialize(&conn->dev); - INIT_WORK(&conn->work, add_conn); + INIT_WORK(&conn->work_add, add_conn); - queue_work(btaddconn, &conn->work); + queue_work(bluetooth, &conn->work_add); } /* @@ -131,9 +131,12 @@ static int __match_tty(struct device *dev, void *data) static void del_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); + while (1) { struct device *dev; @@ -156,9 +159,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) if (!device_is_registered(&conn->dev)) return; - INIT_WORK(&conn->work, del_conn); + INIT_WORK(&conn->work_del, del_conn); - queue_work(btdelconn, &conn->work); + queue_work(bluetooth, &conn->work_del); } static inline char *host_typetostr(int type) @@ -435,20 +438,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) + bluetooth = create_singlethread_workqueue("bluetooth"); + if (!bluetooth) return -ENOMEM; - btdelconn = create_singlethread_workqueue("btdelconn"); - if (!btdelconn) { - destroy_workqueue(btaddconn); - return -ENOMEM; - } - bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(btdelconn); - destroy_workqueue(btaddconn); + destroy_workqueue(bluetooth); return PTR_ERR(bt_class); } @@ -457,8 +453,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); + destroy_workqueue(bluetooth); class_destroy(bt_class); } From 052b30b0a8eec8db5b18ad49effdf2a9ba4c1e1a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 26 Apr 2009 20:01:22 +0200 Subject: [PATCH 23/40] Bluetooth: Add different pairing timeout for Legacy Pairing The Bluetooth stack uses a reference counting for all established ACL links and if no user (L2CAP connection) is present, the link will be terminated to save power. The problem part is the dedicated pairing when using Legacy Pairing (Bluetooth 2.0 and before). At that point no user is present and pairing attempts will be disconnected within 10 seconds or less. In previous kernel version this was not a problem since the disconnect timeout wasn't triggered on incoming connections for the first time. However this caused issues with broken host stacks that kept the connections around after dedicated pairing. When the support for Simple Pairing got added, the link establishment procedure needed to be changed and now causes issues when using Legacy Pairing When using Simple Pairing it is possible to do a proper reference counting of ACL link users. With Legacy Pairing this is not possible since the specification is unclear in some areas and too many broken Bluetooth devices have already been deployed. So instead of trying to deal with all the broken devices, a special pairing timeout will be introduced that increases the timeout to 60 seconds when pairing is triggered. If a broken devices now puts the stack into an unforeseen state, the worst that happens is the disconnect timeout triggers after 120 seconds instead of 4 seconds. This allows successful pairings with legacy and broken devices now. Based on a report by Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 5 +++-- net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_event.c | 36 +++++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index f69f015bbcc0..ed3aea1605e8 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -101,6 +101,7 @@ enum { /* HCI timeouts */ #define HCI_CONNECT_TIMEOUT (40000) /* 40 seconds */ #define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ +#define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ #define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1224bba24bdd..be5bd713d2c9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -171,6 +171,7 @@ struct hci_conn { __u8 auth_type; __u8 sec_level; __u8 power_save; + __u16 disc_timeout; unsigned long pend; unsigned int sent; @@ -349,9 +350,9 @@ static inline void hci_conn_put(struct hci_conn *conn) if (conn->type == ACL_LINK) { del_timer(&conn->idle_timer); if (conn->state == BT_CONNECTED) { - timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT); + timeo = msecs_to_jiffies(conn->disc_timeout); if (!conn->out) - timeo *= 5; + timeo *= 2; } else timeo = msecs_to_jiffies(10); } else diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1181db08d9de..75ebbe2221a3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -215,6 +215,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->power_save = 1; + conn->disc_timeout = HCI_DISCONN_TIMEOUT; switch (type) { case ACL_LINK: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 15f40ea8d544..4e7cb88e5da9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -883,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -1063,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_proto_connect_cfm(conn, ev->status); hci_conn_put(conn); } - } else + } else { hci_auth_cfm(conn, ev->status); + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; @@ -1479,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_pin_code_req *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1489,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_notify *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) From 3fdca1e1370ffe89980927cdef0583bebcd8caaf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 28 Apr 2009 09:04:55 -0700 Subject: [PATCH 24/40] Bluetooth: Fix connection establishment with low security requirement The Bluetooth 2.1 specification introduced four different security modes that can be mapped using Legacy Pairing and Simple Pairing. With the usage of Simple Pairing it is required that all connections (except the ones for SDP) are encrypted. So even the low security requirement mandates an encrypted connection when using Simple Pairing. When using Legacy Pairing (for Bluetooth 2.0 devices and older) this is not required since it causes interoperability issues. To support this properly the low security requirement translates into different host controller transactions depending if Simple Pairing is supported or not. However in case of Simple Pairing the command to switch on encryption after a successful authentication is not triggered for the low security mode. This patch fixes this and actually makes the logic to differentiate between Simple Pairing and Legacy Pairing a lot simpler. Based on a report by Ville Tervo Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 75ebbe2221a3..375f4b4f7f79 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -425,12 +425,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (sec_level == BT_SECURITY_SDP) return 1; - if (sec_level == BT_SECURITY_LOW) { - if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) - return hci_conn_auth(conn, sec_level, auth_type); - else - return 1; - } + if (sec_level == BT_SECURITY_LOW && + (!conn->ssp_mode || !conn->hdev->ssp_mode)) + return 1; if (conn->link_mode & HCI_LM_ENCRYPT) return hci_conn_auth(conn, sec_level, auth_type); From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 28 Apr 2009 22:36:33 -0700 Subject: [PATCH 25/40] netfilter: revised locking for x_tables The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger Acked-by: Linus Torvalds Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 73 +++++++++++++++-- net/ipv4/netfilter/arp_tables.c | 125 +++++++++------------------- net/ipv4/netfilter/ip_tables.c | 126 ++++++++--------------------- net/ipv6/netfilter/ip6_tables.c | 123 +++++++++------------------- net/netfilter/x_tables.c | 53 ++++++------ 5 files changed, 204 insertions(+), 296 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -354,9 +354,6 @@ struct xt_table /* What hooks you will enter on */ unsigned int valid_hooks; - /* Lock for the curtain */ - struct mutex lock; - /* Man behind the curtain... */ struct xt_table_info *private; @@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); -extern void xt_table_entry_swap_rcu(struct xt_table_info *old, - struct xt_table_info *new); + +/* + * Per-CPU spinlock associated with per-cpu table entries, and + * with a counter for the "reading" side that allows a recursive + * reader to avoid taking the lock and deadlocking. + * + * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. + * It needs to ensure that the rules are not being changed while the packet + * is being processed. In some cases, the read lock will be acquired + * twice on the same CPU; this is okay because of the count. + * + * "writing" is used when reading counters. + * During replace any readers that are using the old tables have to complete + * before freeing the old table. This is handled by the write locking + * necessary for reading the counters. + */ +struct xt_info_lock { + spinlock_t lock; + unsigned char readers; +}; +DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); + +/* + * Note: we need to ensure that preemption is disabled before acquiring + * the per-cpu-variable, so we do it as a two step process rather than + * using "spin_lock_bh()". + * + * We _also_ need to disable bottom half processing before updating our + * nesting count, to make sure that the only kind of re-entrancy is this + * code being called by itself: since the count+lock is not an atomic + * operation, we can allow no races. + * + * _Only_ that special combination of being per-cpu and never getting + * re-entered asynchronously means that the count is safe. + */ +static inline void xt_info_rdlock_bh(void) +{ + struct xt_info_lock *lock; + + local_bh_disable(); + lock = &__get_cpu_var(xt_info_locks); + if (!lock->readers++) + spin_lock(&lock->lock); +} + +static inline void xt_info_rdunlock_bh(void) +{ + struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + + if (!--lock->readers) + spin_unlock(&lock->lock); + local_bh_enable(); +} + +/* + * The "writer" side needs to get exclusive access to the lock, + * regardless of readers. This must be called with bottom half + * processing (and thus also preemption) disabled. + */ +static inline void xt_info_wrlock(unsigned int cpu) +{ + spin_lock(&per_cpu(xt_info_locks, cpu).lock); +} + +static inline void xt_info_wrunlock(unsigned int cpu) +{ + spin_unlock(&per_cpu(xt_info_locks, cpu).lock); +} /* * This helper is performance critical and must be inlined diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 800ae8542471..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) From ac7c992cac0c8f276aa8e4a8273204a6db707bb3 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 28 Apr 2009 22:42:39 -0700 Subject: [PATCH 26/40] e100: do not go D3 in shutdown unless system is powering off After experimenting with kexec with the last merges after 2.6.29, I've had some problems when probing e100. It would not read the eeprom. After some bisects, I realized this has been like that since forever (at least 2.6.18). The problem is that shutdown is doing the same thing that suspend does and puts the device in D3 state. I couldn't find a way to get the device back to a sane state in the probe function. So, based on some similar patches from Rafael J. Wysocki for e1000, e1000e, and ixgbe, I wrote this one for e100. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: "Rafael J. Wysocki" Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e100.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 5c0b457c7868..0f9ee1348552 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2728,7 +2728,7 @@ static void __devexit e100_remove(struct pci_dev *pdev) #define E100_82552_SMARTSPEED 0x14 /* SmartSpeed Ctrl register */ #define E100_82552_REV_ANEG 0x0200 /* Reverse auto-negotiation */ #define E100_82552_ANEG_NOW 0x0400 /* Auto-negotiate now */ -static int e100_suspend(struct pci_dev *pdev, pm_message_t state) +static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) { struct net_device *netdev = pci_get_drvdata(pdev); struct nic *nic = netdev_priv(netdev); @@ -2749,19 +2749,32 @@ static int e100_suspend(struct pci_dev *pdev, pm_message_t state) E100_82552_SMARTSPEED, smartspeed | E100_82552_REV_ANEG | E100_82552_ANEG_NOW); } - if (pci_enable_wake(pdev, PCI_D3cold, true)) - pci_enable_wake(pdev, PCI_D3hot, true); + *enable_wake = true; } else { - pci_enable_wake(pdev, PCI_D3hot, false); + *enable_wake = false; } pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); +} - return 0; +static int __e100_power_off(struct pci_dev *pdev, bool wake) +{ + if (wake) { + return pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + return pci_set_power_state(pdev, PCI_D3hot); + } } #ifdef CONFIG_PM +static int e100_suspend(struct pci_dev *pdev, pm_message_t state) +{ + bool wake; + __e100_shutdown(pdev, &wake); + return __e100_power_off(pdev, wake); +} + static int e100_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -2792,7 +2805,10 @@ static int e100_resume(struct pci_dev *pdev) static void e100_shutdown(struct pci_dev *pdev) { - e100_suspend(pdev, PMSG_SUSPEND); + bool wake; + __e100_shutdown(pdev, &wake); + if (system_state == SYSTEM_POWER_OFF) + __e100_power_off(pdev, wake); } /* ------------------ PCI Error Recovery infrastructure -------------- */ From 92ab9baee3e075e4d84d9b74f6da5b047539e323 Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Mon, 13 Apr 2009 21:43:31 -0300 Subject: [PATCH 27/40] V4L/DVB (11494): cx18: Send correct input routing value to external audio multiplexers A late v4l2_subdev framework change accidentally sent the audio input routing value to the external multiplexer, instead of the muxer input routing value to the external multiplexer. This change corrects that error. Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx18/cx18-audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/cx18/cx18-audio.c b/drivers/media/video/cx18/cx18-audio.c index 1519e91c677a..7a8ad5963de8 100644 --- a/drivers/media/video/cx18/cx18-audio.c +++ b/drivers/media/video/cx18/cx18-audio.c @@ -44,7 +44,7 @@ int cx18_audio_set_io(struct cx18 *cx) /* handle muxer chips */ v4l2_subdev_call(cx->sd_extmux, audio, s_routing, - in->audio_input, 0, 0); + (u32) in->muxer_input, 0, 0); err = cx18_call_hw_err(cx, cx->card->hw_audio_ctrl, audio, s_routing, in->audio_input, 0, 0); From 3964b58a25fdca066011600bf58b38d00e9ae28d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2009 13:56:58 -0300 Subject: [PATCH 28/40] V4L/DVB (11494a): cx231xx Kconfig fixes selecting ALSA module breaks if !SND. Just remove select. While here, let's fix the whitespacing at the Kconfig. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx231xx/Kconfig | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/media/video/cx231xx/Kconfig b/drivers/media/video/cx231xx/Kconfig index 91156546a07a..477d4ab5e9ac 100644 --- a/drivers/media/video/cx231xx/Kconfig +++ b/drivers/media/video/cx231xx/Kconfig @@ -1,12 +1,11 @@ config VIDEO_CX231XX - tristate "Conexant cx231xx USB video capture support" - depends on VIDEO_DEV && I2C && INPUT - select VIDEO_TUNER - select VIDEO_TVEEPROM - select VIDEO_IR - select VIDEOBUF_VMALLOC - select VIDEO_CX25840 - select VIDEO_CX231XX_ALSA + tristate "Conexant cx231xx USB video capture support" + depends on VIDEO_DEV && I2C && INPUT + select VIDEO_TUNER + select VIDEO_TVEEPROM + select VIDEO_IR + select VIDEOBUF_VMALLOC + select VIDEO_CX25840 ---help--- This is a video4linux driver for Conexant 231xx USB based TV cards. @@ -15,21 +14,22 @@ config VIDEO_CX231XX module will be called cx231xx config VIDEO_CX231XX_ALSA - tristate "Conexant Cx231xx ALSA audio module" - depends on VIDEO_CX231XX && SND - select SND_PCM + tristate "Conexant Cx231xx ALSA audio module" + depends on VIDEO_CX231XX && SND + select SND_PCM - ---help--- - This is an ALSA driver for Cx231xx USB based TV cards. + ---help--- + This is an ALSA driver for Cx231xx USB based TV cards. - To compile this driver as a module, choose M here: the - module will be called cx231xx-alsa + To compile this driver as a module, choose M here: the + module will be called cx231xx-alsa config VIDEO_CX231XX_DVB - tristate "DVB/ATSC Support for Cx231xx based TV cards" - depends on VIDEO_CX231XX && DVB_CORE - select VIDEOBUF_DVB - select MEDIA_TUNER_XC5000 if !DVB_FE_CUSTOMISE - ---help--- - This adds support for DVB cards based on the - Conexant cx231xx chips. + tristate "DVB/ATSC Support for Cx231xx based TV cards" + depends on VIDEO_CX231XX && DVB_CORE + select VIDEOBUF_DVB + select MEDIA_TUNER_XC5000 if !DVB_FE_CUSTOMISE + + ---help--- + This adds support for DVB cards based on the + Conexant cx231xx chips. From 5b83cfa98569663b6fa6cda85a1f2fd371a082c7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Apr 2009 19:50:33 -0300 Subject: [PATCH 29/40] V4L/DVB (11515): drivers/media/video/saa5249.c: fix use-after-free and leak I moved the kfree() down a couple lines. t->vdev is going to be in freed memory so there is no point setting it to NULL. I added a kfree(t) on a Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/saa5249.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c index 48b27fe48087..271d6e931b75 100644 --- a/drivers/media/video/saa5249.c +++ b/drivers/media/video/saa5249.c @@ -598,6 +598,7 @@ static int saa5249_probe(struct i2c_client *client, /* Now create a video4linux device */ t->vdev = video_device_alloc(); if (t->vdev == NULL) { + kfree(t); kfree(client); return -ENOMEM; } @@ -617,9 +618,8 @@ static int saa5249_probe(struct i2c_client *client, /* Register it */ err = video_register_device(t->vdev, VFL_TYPE_VTX, -1); if (err < 0) { - kfree(t); video_device_release(t->vdev); - t->vdev = NULL; + kfree(t); return err; } return 0; From 9401608bb34be14960311dbb91d4a82ee07a03fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Apr 2009 19:51:30 -0300 Subject: [PATCH 30/40] V4L/DVB (11516): drivers/media/video/saa5246a.c: fix use-after-free I lowered the kfree(t) down a couple lines and removed the superflous "t->vdev = NULL;" Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/saa5246a.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c index da47b2f05288..155804b061e9 100644 --- a/drivers/media/video/saa5246a.c +++ b/drivers/media/video/saa5246a.c @@ -1092,9 +1092,8 @@ static int saa5246a_probe(struct i2c_client *client, /* Register it */ err = video_register_device(t->vdev, VFL_TYPE_VTX, -1); if (err < 0) { - kfree(t); video_device_release(t->vdev); - t->vdev = NULL; + kfree(t); return err; } return 0; From a357482a1e8fdd39f0a58c33ed2ffd0f1becb825 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 21 Apr 2009 04:22:38 -0300 Subject: [PATCH 31/40] V4L/DVB (11561a): move media after i2c Currently drivers/media drivers are linked very early - directly after base, block, misc, and mfd and before ata, scsi, ide, input, firewire, usb, and i2c. This breaks static build of video4linux drivers, that use generic CPU i2c adapter drivers and the v4l2-subdev subsystem, because during video4linux probing the v4l2-subdev core requires a struct i2c_adapter context, which cannot be satisfied before the i2c subsystem is initialised. Moving drivers/media after drivers/i2c fixes this problem. The best way to trigger action is by submitting a patch:-) So, let's see what comes out of it - on the one hand I don't see any reason why media has to be linked this early, and nobody was able to give me one yesterday as this problem has been discussed on linux-media, OTOH, maybe indeed it would be better to move i2c the whole way up above media, but that'd be much bigger of a change, I think. -- To unsubscribe from this list: send the line "unsubscribe linux-media" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Signed-off-by: Guennadi Liakhovetski Acked-by: Jean Delvare Signed-off-by: Mauro Carvalho Chehab --- drivers/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index 2618a6169a13..1266ead6ace0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_FB_INTEL) += video/intelfb/ obj-y += serial/ obj-$(CONFIG_PARPORT) += parport/ -obj-y += base/ block/ misc/ mfd/ media/ +obj-y += base/ block/ misc/ mfd/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ obj-$(CONFIG_IDE) += ide/ @@ -71,7 +71,7 @@ obj-$(CONFIG_GAMEPORT) += input/gameport/ obj-$(CONFIG_INPUT) += input/ obj-$(CONFIG_I2O) += message/ obj-$(CONFIG_RTC_LIB) += rtc/ -obj-y += i2c/ +obj-y += i2c/ media/ obj-$(CONFIG_W1) += w1/ obj-$(CONFIG_POWER_SUPPLY) += power/ obj-$(CONFIG_HWMON) += hwmon/ From 272aa3966b3244e576c5c07bfff77ea320b89317 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 17 Apr 2009 10:56:51 -0300 Subject: [PATCH 32/40] V4L/DVB (11568): cx18: Fix the handling of i2c bus registration error * Return actual error values as returned by the i2c subsystem, rather than 0 or 1. * If the registration of the second bus fails, unregister the first one before exiting, otherwise we are leaking resources. Signed-off-by: Jean Delvare Acked-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx18/cx18-i2c.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c index b9b7064a2be8..8591e4fc359f 100644 --- a/drivers/media/video/cx18/cx18-i2c.c +++ b/drivers/media/video/cx18/cx18-i2c.c @@ -211,7 +211,7 @@ static struct i2c_algo_bit_data cx18_i2c_algo_template = { /* init + register i2c algo-bit adapter */ int init_cx18_i2c(struct cx18 *cx) { - int i; + int i, err; CX18_DEBUG_I2C("i2c init\n"); for (i = 0; i < 2; i++) { @@ -268,8 +268,18 @@ int init_cx18_i2c(struct cx18 *cx) cx18_call_hw(cx, CX18_HW_GPIO_RESET_CTRL, core, reset, (u32) CX18_GPIO_RESET_I2C); - return i2c_bit_add_bus(&cx->i2c_adap[0]) || - i2c_bit_add_bus(&cx->i2c_adap[1]); + err = i2c_bit_add_bus(&cx->i2c_adap[0]); + if (err) + goto err; + err = i2c_bit_add_bus(&cx->i2c_adap[1]); + if (err) + goto err_del_bus_0; + return 0; + + err_del_bus_0: + i2c_del_adapter(&cx->i2c_adap[0]); + err: + return err; } void exit_cx18_i2c(struct cx18 *cx) From 9d63cec16829ac3600c5014d207a659df3b93c9a Mon Sep 17 00:00:00 2001 From: Dean Anderson Date: Mon, 20 Apr 2009 19:07:44 -0300 Subject: [PATCH 33/40] V4L/DVB (11570): patch: s2255drv: fix race condition on set mode set_modeready flag must be set before command sent to USB in s2255_write_config. Signed-off-by: Dean Anderson Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/s2255drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index 5202cadb2aae..30f4698be90a 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -1237,6 +1237,7 @@ static int s2255_set_mode(struct s2255_dev *dev, unsigned long chn, buffer[1] = (u32) chn_rev; buffer[2] = CMD_SET_MODE; memcpy(&buffer[3], &dev->mode[chn], sizeof(struct s2255_mode)); + dev->setmode_ready[chn] = 0; res = s2255_write_config(dev->udev, (unsigned char *)buffer, 512); if (debug) dump_verify_mode(dev, mode); @@ -1245,7 +1246,6 @@ static int s2255_set_mode(struct s2255_dev *dev, unsigned long chn, /* wait at least 3 frames before continuing */ if (mode->restart) { - dev->setmode_ready[chn] = 0; wait_event_timeout(dev->wait_setmode[chn], (dev->setmode_ready[chn] != 0), msecs_to_jiffies(S2255_SETMODE_TIMEOUT)); From e39c9047975f4302354c16c33eecfe59946d3fc4 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Apr 2009 12:58:24 -0300 Subject: [PATCH 34/40] V4L/DVB (11612): mx3_camera: Fix compilation with CONFIG_PM Signed-off-by: Sascha Hauer Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/mx3_camera.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c index c462b811e994..2d0781118eb0 100644 --- a/drivers/media/video/mx3_camera.c +++ b/drivers/media/video/mx3_camera.c @@ -1063,10 +1063,6 @@ static struct soc_camera_host_ops mx3_soc_camera_host_ops = { .owner = THIS_MODULE, .add = mx3_camera_add_device, .remove = mx3_camera_remove_device, -#ifdef CONFIG_PM - .suspend = mx3_camera_suspend, - .resume = mx3_camera_resume, -#endif .set_crop = mx3_camera_set_crop, .set_fmt = mx3_camera_set_fmt, .try_fmt = mx3_camera_try_fmt, From d4dc673da9a94716ca2410306c1b36b5faf6c4cc Mon Sep 17 00:00:00 2001 From: Christopher Pascoe Date: Mon, 27 Apr 2009 11:27:04 -0300 Subject: [PATCH 35/40] V4L/DVB (11626): cx23885: Two fixes for DViCO FusionHDTV DVB-T Dual Express Two fixes for DViCO FusionHDTV DVB-T Dual Express: * Reset correct tuner when reinitializing xc3028. * Disable the I2C gate control to avoid locking up the I2C bus. Tested-by: John Knops Reviewed-by: Steven Toth Signed-off-by: Christopher Pascoe Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx23885/cx23885-cards.c | 4 ++-- drivers/media/video/cx23885/cx23885-dvb.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/video/cx23885/cx23885-cards.c b/drivers/media/video/cx23885/cx23885-cards.c index a3c0565be1a9..6d6293f7d428 100644 --- a/drivers/media/video/cx23885/cx23885-cards.c +++ b/drivers/media/video/cx23885/cx23885-cards.c @@ -441,9 +441,9 @@ int cx23885_tuner_callback(void *priv, int component, int command, int arg) case CX23885_BOARD_DVICO_FUSIONHDTV_DVB_T_DUAL_EXP: /* Two identical tuners on two different i2c buses, * we need to reset the correct gpio. */ - if (port->nr == 0) + if (port->nr == 1) bitmask = 0x01; - else if (port->nr == 1) + else if (port->nr == 2) bitmask = 0x04; break; } diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index f48454ab3900..0c49a98213c4 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -314,6 +314,7 @@ static struct zl10353_config dvico_fusionhdtv_xc3028 = { .demod_address = 0x0f, .if2 = 45600, .no_tuner = 1, + .disable_i2c_gate_ctrl = 1, }; static struct stv0900_config netup_stv0900_config = { From fe78a49c8ae009d33d6e2c80d4f7f2634440d523 Mon Sep 17 00:00:00 2001 From: Devin Heitmueller Date: Tue, 28 Apr 2009 13:14:07 -0300 Subject: [PATCH 36/40] V4L/DVB (11652): au0828: fix kernel oops regression on USB disconnect. A regression was introduced in hg changeset 33810c734a0d, which resulted in a kernel panic whenever the device was disconnected from USB. The call to 4l2_device_register() was overwriting the pointer for usb_set_intfdata(), so when au0828_usb_disconnect() was called, the usb_get_intfdata() returned a pointer to the v4l2_device instead of the au0828_dev structure. Signed-off-by: Devin Heitmueller Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/au0828/au0828-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/video/au0828/au0828-core.c b/drivers/media/video/au0828/au0828-core.c index 4cee0b92eeee..a1e4c0d769a6 100644 --- a/drivers/media/video/au0828/au0828-core.c +++ b/drivers/media/video/au0828/au0828-core.c @@ -192,8 +192,6 @@ static int au0828_usb_probe(struct usb_interface *interface, dev->usbdev = usbdev; dev->boardnr = id->driver_info; - usb_set_intfdata(interface, dev); - /* Create the v4l2_device */ retval = v4l2_device_register(&interface->dev, &dev->v4l2_dev); if (retval) { @@ -222,6 +220,10 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Digital TV */ au0828_dvb_register(dev); + /* Store the pointer to the au0828_dev so it can be accessed in + au0828_usb_disconnect */ + usb_set_intfdata(interface, dev); + printk(KERN_INFO "Registered device AU0828 [%s]\n", dev->board.name == NULL ? "Unset" : dev->board.name); From 091438dd5668396328a3419abcbc6591159eb8d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Apr 2009 21:48:16 -0700 Subject: [PATCH 37/40] Linux 2.6.30-rc4 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 9e5dc8f0ef47..eb38c8399261 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 30 -EXTRAVERSION = -rc3 -NAME = Temporary Tasmanian Devil +EXTRAVERSION = -rc4 +NAME = Vindictive Armadillo # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 30b4ae8a4498543863501f707879b7220b649602 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:01 +0000 Subject: [PATCH 38/40] signals: split do_tkill Split out the code from do_tkill to make it reusable by the follow up patch which implements sys_rt_tgsigqueueinfo Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov --- kernel/signal.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index d8034737db4c..56d27acad87e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2278,24 +2278,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) return kill_something_info(sig, &info, pid); } -static int do_tkill(pid_t tgid, pid_t pid, int sig) +static int +do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { - int error; - struct siginfo info; struct task_struct *p; unsigned long flags; - - error = -ESRCH; - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + int error = -ESRCH; rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { - error = check_kill_permission(sig, &info, p); + error = check_kill_permission(sig, info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. @@ -2305,7 +2298,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * signal is private anyway. */ if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, &info, p); + error = specific_send_sig_info(sig, info, p); unlock_task_sighand(p, &flags); } } @@ -2314,6 +2307,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) return error; } +static int do_tkill(pid_t tgid, pid_t pid, int sig) +{ + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = task_tgid_vnr(current); + info.si_uid = current_uid(); + + return do_send_specific(tgid, pid, sig, &info); +} + /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:06 +0000 Subject: [PATCH 39/40] signals: implement sys_rt_tgsigqueueinfo sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is missing a thread directed counterpart. Such an interface is important for migrating applications from other OSes which have the per thread delivery implemented. Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: Ulrich Drepper --- include/linux/compat.h | 2 ++ include/linux/signal.h | 2 ++ kernel/compat.c | 11 +++++++++++ kernel/signal.c | 26 ++++++++++++++++++++++++++ 4 files changed, 41 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3c..af931ee43dd8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa53..c7552836bd95 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/compat.c b/kernel/compat.c index 42d56544460f..f6c204f07ea6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, } +asmlinkage long +compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* compat_time_t is a 32 bit "long" and needs to get converted. */ diff --git a/kernel/signal.c b/kernel/signal.c index 56d27acad87e..f79b3b9f8375 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2369,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, return kill_proc_info(sig, &info, pid); } +long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) +{ + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info->si_code >= 0) + return -EPERM; + info->si_signo = sig; + + return do_send_specific(tgid, pid, sig, info); +} + +SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *t = current; From 12d161147f828192b5bcc33166f468a827832767 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:10 +0000 Subject: [PATCH 40/40] x86: hookup sys_rt_tgsigqueueinfo Make the new sys_rt_tgsigqueueinfo available for x86. Signed-off-by: Thomas Gleixner --- arch/x86/ia32/ia32entry.S | 1 + arch/x86/include/asm/unistd_32.h | 1 + arch/x86/include/asm/unistd_64.h | 2 ++ arch/x86/kernel/syscall_table_32.S | 1 + 4 files changed, 5 insertions(+) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..dcef387ddc36 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -830,4 +830,5 @@ ia32_sys_call_table: .quad sys_inotify_init1 .quad compat_sys_preadv .quad compat_sys_pwritev + .quad compat_sys_rt_tgsigqueueinfo /* 335 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..708dae61262d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -340,6 +340,7 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..4e2b05404400 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) __SYSCALL(__NR_preadv, sys_preadv) #define __NR_pwritev 296 __SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 297 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #ifndef __NO_STUBS diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..734f92c02dde 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -334,3 +334,4 @@ ENTRY(sys_call_table) .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_rt_tgsigqueueinfo /* 335 */