From 907a42617970a159361f17ef9a63f04d276995ab Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Oct 2005 18:19:19 -0700 Subject: [PATCH 01/59] Linux v2.6.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fdb96bc85080..504ba3ceb296 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 14 -EXTRAVERSION =-rc3 +EXTRAVERSION =-rc4 NAME=Affluent Albatross # *DOCUMENTATION* From b8df110fea555d5088bba67f446c2435104405be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 10 Oct 2005 20:43:22 -0700 Subject: [PATCH 02/59] [SPARC64]: Fix oops on runlevel change with serial console. Incorrect uart_write_wakeup() calls cause reference to a NULL tty pointer in sunsab and sunzilog serial drivers. Signed-off-by: David S. Miller --- drivers/serial/sunsab.c | 1 - drivers/serial/sunzilog.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index e971156daa60..ba9381fd3f2d 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -274,7 +274,6 @@ static void transmit_chars(struct uart_sunsab_port *up, if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { up->interrupt_mask1 |= SAB82532_IMR1_XPR; writeb(up->interrupt_mask1, &up->regs->w.imr1); - uart_write_wakeup(&up->port); return; } diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index d75445738c88..7653d6cf05af 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -517,10 +517,9 @@ static void sunzilog_transmit_chars(struct uart_sunzilog_port *up, if (up->port.info == NULL) goto ack_tx_int; xmit = &up->port.info->xmit; - if (uart_circ_empty(xmit)) { - uart_write_wakeup(&up->port); + if (uart_circ_empty(xmit)) goto ack_tx_int; - } + if (uart_tx_stopped(&up->port)) goto ack_tx_int; From 85d9b05d9b1edad9a2630584754720a957ab0a2a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:47:42 -0700 Subject: [PATCH 03/59] [NETFILTER] PPTP helper: Add missing Kconfig dependency PPTP should not be selectable without conntrack enabled Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a7659728e7a0..4b6f80775fb0 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -139,6 +139,7 @@ config IP_NF_AMANDA config IP_NF_PPTP tristate 'PPTP protocol support' + depends on IP_NF_CONNTRACK help This module adds support for PPTP (Point to Point Tunnelling Protocol, RFC2637) connection tracking and NAT. From f40863cec87464f3f4ec3a6c00e3fda3bbb0c91b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:51:53 -0700 Subject: [PATCH 04/59] [NETFILTER] ipt_ULOG: Mark ipt_ULOG as OBSOLETE Similar to nfnetlink_queue and ip_queue, we mark ipt_ULOG as obsolete. This should have been part of the original nfnetlink_log merge, but I somehow missed it. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 4b6f80775fb0..7d917e4ce1d9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -499,9 +499,14 @@ config IP_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (OBSOLETE)" depends on IP_NF_IPTABLES ---help--- + + This option enables the old IPv4-only "ipt_ULOG" implementation + which has been obsoleted by the new "nfnetlink_log" code (see + CONFIG_NETFILTER_NETLINK_LOG). + This option adds a `ULOG' target, which allows you to create rules in any iptables table. The packet is passed to a userspace logging daemon using netlink multicast sockets; unlike the LOG target From ebe0bbf06c9e03613bdcb6b5a704595a9344b7ff Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:19 -0700 Subject: [PATCH 05/59] [NETFILTER] nfnetlink: use highest bit of nfa_type to indicate nested TLV As Henrik Nordstrom pointed out, all our efforts with "split endian" (i.e. host byte order tags, net byte order values) are useless, unless a parser can determine whether an attribute is nested or not. This patch steals the highest bit of nfattr.nfa_type to indicate whether the data payload contains a nested nfattr (1) or not (0). This will break userspace compatibility, but luckily no kernel with nfnetlink was released so far. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 12 ++++++++---- net/netfilter/nfnetlink.c | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d5b10ae2399..f08e870100f4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,11 +41,15 @@ enum nfnetlink_groups { struct nfattr { u_int16_t nfa_len; - u_int16_t nfa_type; + u_int16_t nfa_type; /* we use 15 bits for the type, and the highest + * bit to indicate whether the payload is nested */ } __attribute__ ((packed)); -/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time - * to put this in a generic file */ +/* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from + * rtnetlink.h, it's time to put this in a generic file */ + +#define NFNL_NFA_NEST 0x8000 +#define NFA_TYPE(attr) ((attr)->nfa_type & 0x7fff) #define NFA_ALIGNTO 4 #define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) @@ -59,7 +63,7 @@ struct nfattr #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) #define NFA_NEST(skb, type) \ ({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ - NFA_PUT(skb, type, 0, NULL); \ + NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ __start; }) #define NFA_NEST_END(skb, start) \ ({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 1caaca06f698..4bc27a6334c1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -133,7 +133,7 @@ int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) memset(tb, 0, sizeof(struct nfattr *) * maxattr); while (NFA_OK(nfa, len)) { - unsigned flavor = nfa->nfa_type; + unsigned flavor = NFA_TYPE(nfa); if (flavor && flavor <= maxattr) tb[flavor-1] = nfa; nfa = NFA_NEXT(nfa, len); @@ -177,7 +177,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); while (NFA_OK(attr, attrlen)) { - unsigned flavor = attr->nfa_type; + unsigned flavor = NFA_TYPE(attr); if (flavor) { if (flavor > attr_count) return -EINVAL; From b3a91d037a2575040f9b6a483f60c407a3d80368 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:36 -0700 Subject: [PATCH 06/59] [NETFILTER] nat: remove bogus structure member When 'rustynat' was merged in 2.6.12, the use of the "helper" pointer of struct ipt_nat_info was obsoleted, but the pointer not removed from the struct. This patch removes the pointer, thereby yet again shrinking struct ip_conntrack. Discovered-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e201ec6e9905..41a107de17cf 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -58,10 +58,6 @@ extern rwlock_t ip_nat_lock; struct ip_nat_info { struct list_head bysource; - - /* Helper (NULL if none). */ - struct ip_nat_helper *helper; - struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; From d000eaf7720cb12cd03cd3d55f71be44357d27a9 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:51 -0700 Subject: [PATCH 07/59] [NETFILTER] conntrack_netlink: Fix endian issue with status from userspace When we send "status" from userspace, we forget to convert the endianness. This patch adds the reqired conversion. Thanks to Pablo Neira for discovering this. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index b08a432efcf8..eade2749915a 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -833,7 +833,8 @@ out: static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { - unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + unsigned long d; + unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) From a1bcc3f26885b0a8bf04799551de2e9574ccbda1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 20:53:16 -0700 Subject: [PATCH 08/59] [NETFILTER] ctnetlink: ICMP ID is not mandatory The ID is only required by ICMP type 8 (echo), so it's not mandatory for all sort of ICMP connections. This patch makes mandatory only the type and the code for ICMP netlink messages. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 838d1d69b36e..98f0015dd255 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -296,8 +296,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], struct ip_conntrack_tuple *tuple) { if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) + || !tb[CTA_PROTO_ICMP_CODE-1]) return -1; tuple->dst.u.icmp.type = From 5bbc243aafff9ad653dc7a9fa7bcaf0b4631355a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:54:01 -0700 Subject: [PATCH 09/59] [NETFILTER]: Add missing include to ip_conntrack_tuple.h Without this #include, __be16 is not defined and userspace programs will break. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 20e43f018b7c..3232db11a4e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -1,6 +1,8 @@ #ifndef _IP_CONNTRACK_TUPLE_H #define _IP_CONNTRACK_TUPLE_H +#include + /* A `tuple' is a structure containing the information to uniquely identify a connection. ie. if two packets have the same tuple, they are in the same connection; if not, they are not. From e1c73b78e3706bd3c336d4730a01dd4081dfb7ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 20:55:49 -0700 Subject: [PATCH 10/59] [NETFILTER] ctnetlink: add one nesting level for TCP state To keep consistency, the TCP private protocol information is nested attributes under CTA_PROTOINFO_TCP. This way the sequence of attributes to access the TCP state information looks like here below: CTA_PROTOINFO CTA_PROTOINFO_TCP CTA_PROTOINFO_TCP_STATE instead of: CTA_PROTOINFO CTA_PROTOINFO_TCP_STATE Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 9 ++++++++- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 5c55751c78e4..fb5511030185 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -70,11 +70,18 @@ enum ctattr_l4proto { enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, - CTA_PROTOINFO_TCP_STATE, + CTA_PROTOINFO_TCP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) +enum ctattr_protoinfo_tcp { + CTA_PROTOINFO_TCP_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_TCP_MAX +}; +#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 121760d6cc50..75e27e65c28f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -341,11 +341,15 @@ static int tcp_print_conntrack(struct seq_file *s, static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct) { + struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + read_lock_bh(&tcp_lock); NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); read_unlock_bh(&tcp_lock); + NFA_NEST_END(skb, nest_parms); + return 0; nfattr_failure: From 08eb8f124f990aa476589d1f7810f7ec7f259c08 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 10 Oct 2005 21:02:26 -0700 Subject: [PATCH 11/59] [SPARC32]: Revert IOMAP change eb98129eec7fa605f0407dfd92d40ee8ddf5cd9a Breakage noted by Al Viro. It breaks non-PCI builds, it's probably better to have a more direct implementation on sparc32, and which driver actually needs this is still questionable. We can resolve this in 2.6.15 Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 4 ---- arch/sparc/defconfig | 1 - 2 files changed, 5 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f7c51b869049..6537445dac0e 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -21,10 +21,6 @@ config GENERIC_ISA_DMA bool default y -config GENERIC_IOMAP - bool - default y - source "init/Kconfig" menu "General machine setup" diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index 8a3aef1e22f5..a69856263009 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -5,7 +5,6 @@ CONFIG_MMU=y CONFIG_UID16=y CONFIG_HIGHMEM=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options From a02a64223eddb410712b015fb3342c9a316ab70b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Oct 2005 21:11:08 -0700 Subject: [PATCH 12/59] [IPSEC]: Use ALIGN macro in ESP This patch uses the macro ALIGN in all the applicable spots for ESP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 11 ++++++----- net/ipv6/esp6.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b5a09d1b90b..e911c6dd8296 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; - clen = (clen + 2 + blksize-1)&~(blksize-1); + blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) - clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); + clen = ALIGN(clen, esp->conf.padlen); if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) goto error; @@ -307,13 +308,13 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); if (x->props.mode) { - mtu = (mtu + 2 + blksize-1)&~(blksize-1); + mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ mtu += 2 + blksize; } if (esp->conf.padlen) - mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); + mtu = ALIGN(mtu, esp->conf.padlen); return mtu + x->props.header_len + esp->auth.icv_trunc_len; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9b27460f0cc7..0c7d7b42c80b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -66,10 +67,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; - clen = (clen + 2 + blksize-1)&~(blksize-1); + blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) - clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); + clen = ALIGN(clen, esp->conf.padlen); if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) { goto error; @@ -238,13 +239,13 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); if (x->props.mode) { - mtu = (mtu + 2 + blksize-1)&~(blksize-1); + mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ mtu += 2 + blksize; } if (esp->conf.padlen) - mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); + mtu = ALIGN(mtu, esp->conf.padlen); return mtu + x->props.header_len + esp->auth.icv_full_len; } From d4875b049b2e6401a6e1fae90b7f09e20a636fcf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Oct 2005 21:11:34 -0700 Subject: [PATCH 13/59] [IPSEC] Fix block size/MTU bugs in ESP This patch fixes the following bugs in ESP: * Fix transport mode MTU overestimate. This means that the inner MTU is smaller than it needs be. Worse yet, given an input MTU which is a multiple of 4 it will always produce an estimate which is not a multiple of 4. For example, given a standard ESP/3DES/MD5 transform and an MTU of 1500, the resulting MTU for transport mode is 1462 when it should be 1464. The reason for this is because IP header lengths are always a multiple of 4 for IPv4 and 8 for IPv6. * Ensure that the block size is at least 4. This is required by RFC2406 and corresponds to what the esp_output function does. At the moment this only affects crypto_null as its block size is 1. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 6 +++--- net/ipv6/esp6.c | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e911c6dd8296..1b18ce66e7b7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -144,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct sk_buff *trailer; - int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -305,13 +305,13 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ - mtu += 2 + blksize; + mtu = ALIGN(mtu + 2, 4) + blksize - 4; } if (esp->conf.padlen) mtu = ALIGN(mtu, esp->conf.padlen); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 0c7d7b42c80b..40d9a1935ab5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -134,7 +134,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru struct ipv6_esp_hdr *esph; struct esp_data *esp = x->data; struct sk_buff *trailer; - int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; @@ -236,13 +236,14 @@ out_nofree: static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ - mtu += 2 + blksize; + u32 padsize = ((blksize - 1) & 7) + 1; + mtu = ALIGN(mtu + 2, padsize) + blksize - padsize; } if (esp->conf.padlen) mtu = ALIGN(mtu, esp->conf.padlen); From a051a8f7306476af0a74370ad56e793cb6c43bf7 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 21:21:10 -0700 Subject: [PATCH 14/59] [NETFILTER]: Use only 32bit counters for CONNTRACK_ACCT Initially we used 64bit counters for conntrack-based accounting, since we had no event mechanism to tell userspace that our counters are about to overflow. With nfnetlink_conntrack, we now have such a event mechanism and thus can save 16bytes per connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 6 ++++-- include/linux/netfilter_ipv4/ip_conntrack.h | 8 ++++++-- net/ipv4/netfilter/ip_conntrack_core.c | 13 ++++++++----- net/ipv4/netfilter/ip_conntrack_netlink.c | 8 ++++---- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb5511030185..116fcaced909 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -84,8 +84,10 @@ enum ctattr_protoinfo_tcp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, - CTA_COUNTERS_BYTES, + CTA_COUNTERS_PACKETS, /* old 64bit counters */ + CTA_COUNTERS_BYTES, /* old 64bit counters */ + CTA_COUNTERS32_PACKETS, + CTA_COUNTERS32_BYTES, __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ced38736813..d078bb91d9e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -117,6 +117,10 @@ enum ip_conntrack_events /* NAT info */ IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), }; enum ip_conntrack_expect_events { @@ -192,8 +196,8 @@ do { \ struct ip_conntrack_counter { - u_int64_t packets; - u_int64_t bytes; + u_int32_t packets; + u_int32_t bytes; }; struct ip_conntrack_helper; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index ea65dd3e517a..07a80b56e8dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1119,7 +1119,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, unsigned long extra_jiffies, int do_acct) { - int do_event = 0; + int event = 0; IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); IP_NF_ASSERT(skb); @@ -1129,13 +1129,13 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - do_event = 1; + event = IPCT_REFRESH; } else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - do_event = 1; + event = IPCT_REFRESH; } } @@ -1144,14 +1144,17 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; } #endif write_unlock_bh(&ip_conntrack_lock); /* must be unlocked when calling event cache */ - if (do_event) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + if (event) + ip_conntrack_event_cache(event, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index eade2749915a..06ed91ee8ace 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, struct nfattr *nest_count = NFA_NEST(skb, type); u_int64_t tmp; - tmp = cpu_to_be64(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - tmp = cpu_to_be64(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); NFA_NEST_END(skb, nest_count); From 339231537506846cb232a2f0cc4a2c662b2d5b07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 21:23:28 -0700 Subject: [PATCH 15/59] [NETFILTER] ctnetlink: allow userspace to change TCP state This patch adds the ability of changing the state a TCP connection. I know that this must be used with care but it's required to provide a complete conntrack creation via conntrack_netlink. So I'll document this aspect on the upcoming docs. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- .../netfilter_ipv4/ip_conntrack_protocol.h | 3 +++ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index b6b99be8632a..2c76b879e3dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -52,6 +52,9 @@ struct ip_conntrack_protocol int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct); + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct); + int (*tuple_to_nfattr)(struct sk_buff *skb, const struct ip_conntrack_tuple *t); int (*nfattr_to_tuple)(struct nfattr *tb[], diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 75e27e65c28f..d6701cafbcc2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -356,6 +356,28 @@ nfattr_failure: read_unlock_bh(&tcp_lock); return -1; } + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -1127,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, #endif From 061cb4a0ec34a6e3069d5a1b3c547e55a71498c5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 21:23:46 -0700 Subject: [PATCH 16/59] [NETFILTER] ctnetlink: add support to change protocol info This patch add support to change the state of the private protocol information via conntrack_netlink. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 06ed91ee8ace..166e6069f121 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -949,6 +949,31 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) return 0; } +static inline int +ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct ip_conntrack_protocol *proto; + u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + int err = 0; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0) + goto nfattr_failure; + + proto = ip_conntrack_proto_find_get(npt); + if (!proto) + return -EINVAL; + + if (proto->from_nfattr) + err = proto->from_nfattr(tb, ct); + ip_conntrack_proto_put(proto); + + return err; + +nfattr_failure: + return -ENOMEM; +} + static int ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) { @@ -974,6 +999,12 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) return err; } + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + DEBUGP("all done\n"); return 0; } @@ -1003,6 +1034,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (err < 0) goto err; + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + ct->helper = ip_conntrack_helper_find_get(rtuple); add_timer(&ct->timeout); From 777b25a2fea7129222eb11fba55c0a67982383ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Oct 2005 21:24:20 -0700 Subject: [PATCH 17/59] [CCID]: Check if ccid is NULL in the hc_[tr]x_exit functions For consistency with ccid_exit and to fix a bug when IP_DCCP_UNLOAD_HACK is enabled as the control sock is not associated to any CCID. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 21e55142dcd3..c37eeeaf5c6e 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -110,14 +110,14 @@ static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_rx_exit != NULL && + if (ccid != NULL && ccid->ccid_hc_rx_exit != NULL && dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) ccid->ccid_hc_rx_exit(sk); } static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_tx_exit != NULL && + if (ccid != NULL && ccid->ccid_hc_tx_exit != NULL && dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) ccid->ccid_hc_tx_exit(sk); } From 2a9bc9bb4d3a4570a8a48aadf071b91e657adb89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Oct 2005 21:25:00 -0700 Subject: [PATCH 18/59] [DCCP]: Transition from PARTOPEN to OPEN when receiving DATA packets Noticed by Andrea Bittau, that provided a patch that was modified to not transition from RESPOND to OPEN when receiving DATA packets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 1b6b2cb12376..3454d5941900 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -375,6 +375,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_RESET: inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); break; + case DCCP_PKT_DATA: + if (sk->sk_state == DCCP_RESPOND) + break; case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* @@ -393,7 +396,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_set_state(sk, DCCP_OPEN); - if (dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_DATAACK || + dh->dccph_type == DCCP_PKT_DATA) { dccp_rcv_established(sk, skb, dh, len); queued = 1; /* packet was queued (by dccp_rcv_established) */ From eeb2b8560676e454ad37ee30b49bc7d897edc9be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Oct 2005 21:25:23 -0700 Subject: [PATCH 19/59] [TWSK]: Grab the module refcount for timewait sockets This is required to avoid unloading a module that has active timewait sockets, such as DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +++ net/ipv4/inet_timewait_sock.c | 1 + 2 files changed, 4 insertions(+) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4ade56ef3a4d..28f7b2103505 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -193,11 +194,13 @@ static inline u32 inet_rcv_saddr(const struct sock *sk) static inline void inet_twsk_put(struct inet_timewait_sock *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { + struct module *owner = tw->tw_prot->owner; #ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif kmem_cache_free(tw->tw_prot->twsk_slab, tw); + module_put(owner); } } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f9076ef3a1a8..a010e9a68811 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -111,6 +111,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } return tw; From 875521ddccfa90d519cf31dfc8aa472f7f6325bb Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 11 Oct 2005 01:38:35 -0400 Subject: [PATCH 20/59] e100: revert CPU cycle saver microcode, it causes severe problems for certain NICs Reverting 685fac63f5ca6c5ca06bab641e1a32bbf9287e89: > [PATCH] e100: CPU cycle saver microcode > > > Add cpu cycle saver microcode to 8086:{1209/1229} other than ICH devices. > > Signed-off-by: Mallikarjuna R Chilakala > Signed-off-by: Ganesh Venkatesan > Signed-off-by: John Ronciak > Signed-off-by: Jeff Garzik --- drivers/net/e100.c | 224 ++++----------------------------------------- 1 file changed, 18 insertions(+), 206 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index fbf1c06ec5c1..40887f09b681 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -903,8 +903,8 @@ static void mdio_write(struct net_device *netdev, int addr, int reg, int data) static void e100_get_defaults(struct nic *nic) { - struct param_range rfds = { .min = 16, .max = 256, .count = 256 }; - struct param_range cbs = { .min = 64, .max = 256, .count = 128 }; + struct param_range rfds = { .min = 16, .max = 256, .count = 64 }; + struct param_range cbs = { .min = 64, .max = 256, .count = 64 }; pci_read_config_byte(nic->pdev, PCI_REVISION_ID, &nic->rev_id); /* MAC type is encoded as rev ID; exception: ICH is treated as 82559 */ @@ -1007,213 +1007,25 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]); } -/********************************************************/ -/* Micro code for 8086:1229 Rev 8 */ -/********************************************************/ - -/* Parameter values for the D101M B-step */ -#define D101M_CPUSAVER_TIMER_DWORD 78 -#define D101M_CPUSAVER_BUNDLE_DWORD 65 -#define D101M_CPUSAVER_MIN_SIZE_DWORD 126 - -#define D101M_B_RCVBUNDLE_UCODE \ -{\ -0x00550215, 0xFFFF0437, 0xFFFFFFFF, 0x06A70789, 0xFFFFFFFF, 0x0558FFFF, \ -0x000C0001, 0x00101312, 0x000C0008, 0x00380216, \ -0x0010009C, 0x00204056, 0x002380CC, 0x00380056, \ -0x0010009C, 0x00244C0B, 0x00000800, 0x00124818, \ -0x00380438, 0x00000000, 0x00140000, 0x00380555, \ -0x00308000, 0x00100662, 0x00100561, 0x000E0408, \ -0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ -0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ -0x000C007E, 0x00222C21, 0x000C0002, 0x00103093, \ -0x00380C7A, 0x00080000, 0x00103090, 0x00380C7A, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x0010009C, 0x00244C2D, 0x00010004, 0x00041000, \ -0x003A0437, 0x00044010, 0x0038078A, 0x00000000, \ -0x00100099, 0x00206C7A, 0x0010009C, 0x00244C48, \ -0x00130824, 0x000C0001, 0x00101213, 0x00260C75, \ -0x00041000, 0x00010004, 0x00130826, 0x000C0006, \ -0x002206A8, 0x0013C926, 0x00101313, 0x003806A8, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ -0x00101210, 0x00380C34, 0x00000000, 0x00000000, \ -0x0021155B, 0x00100099, 0x00206559, 0x0010009C, \ -0x00244559, 0x00130836, 0x000C0000, 0x00220C62, \ -0x000C0001, 0x00101B13, 0x00229C0E, 0x00210C0E, \ -0x00226C0E, 0x00216C0E, 0x0022FC0E, 0x00215C0E, \ -0x00214C0E, 0x00380555, 0x00010004, 0x00041000, \ -0x00278C67, 0x00040800, 0x00018100, 0x003A0437, \ -0x00130826, 0x000C0001, 0x00220559, 0x00101313, \ -0x00380559, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00130831, 0x0010090B, 0x00124813, \ -0x000CFF80, 0x002606AB, 0x00041000, 0x00010004, \ -0x003806A8, 0x00000000, 0x00000000, 0x00000000, \ -} - -/********************************************************/ -/* Micro code for 8086:1229 Rev 9 */ -/********************************************************/ - -/* Parameter values for the D101S */ -#define D101S_CPUSAVER_TIMER_DWORD 78 -#define D101S_CPUSAVER_BUNDLE_DWORD 67 -#define D101S_CPUSAVER_MIN_SIZE_DWORD 128 - -#define D101S_RCVBUNDLE_UCODE \ -{\ -0x00550242, 0xFFFF047E, 0xFFFFFFFF, 0x06FF0818, 0xFFFFFFFF, 0x05A6FFFF, \ -0x000C0001, 0x00101312, 0x000C0008, 0x00380243, \ -0x0010009C, 0x00204056, 0x002380D0, 0x00380056, \ -0x0010009C, 0x00244F8B, 0x00000800, 0x00124818, \ -0x0038047F, 0x00000000, 0x00140000, 0x003805A3, \ -0x00308000, 0x00100610, 0x00100561, 0x000E0408, \ -0x00134861, 0x000C0002, 0x00103093, 0x00308000, \ -0x00100624, 0x00100561, 0x000E0408, 0x00100861, \ -0x000C007E, 0x00222FA1, 0x000C0002, 0x00103093, \ -0x00380F90, 0x00080000, 0x00103090, 0x00380F90, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x0010009C, 0x00244FAD, 0x00010004, 0x00041000, \ -0x003A047E, 0x00044010, 0x00380819, 0x00000000, \ -0x00100099, 0x00206FFD, 0x0010009A, 0x0020AFFD, \ -0x0010009C, 0x00244FC8, 0x00130824, 0x000C0001, \ -0x00101213, 0x00260FF7, 0x00041000, 0x00010004, \ -0x00130826, 0x000C0006, 0x00220700, 0x0013C926, \ -0x00101313, 0x00380700, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00080600, 0x00101B10, 0x00050004, 0x00100826, \ -0x00101210, 0x00380FB6, 0x00000000, 0x00000000, \ -0x002115A9, 0x00100099, 0x002065A7, 0x0010009A, \ -0x0020A5A7, 0x0010009C, 0x002445A7, 0x00130836, \ -0x000C0000, 0x00220FE4, 0x000C0001, 0x00101B13, \ -0x00229F8E, 0x00210F8E, 0x00226F8E, 0x00216F8E, \ -0x0022FF8E, 0x00215F8E, 0x00214F8E, 0x003805A3, \ -0x00010004, 0x00041000, 0x00278FE9, 0x00040800, \ -0x00018100, 0x003A047E, 0x00130826, 0x000C0001, \ -0x002205A7, 0x00101313, 0x003805A7, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00130831, \ -0x0010090B, 0x00124813, 0x000CFF80, 0x00260703, \ -0x00041000, 0x00010004, 0x00380700 \ -} - -/********************************************************/ -/* Micro code for the 8086:1229 Rev F/10 */ -/********************************************************/ - -/* Parameter values for the D102 E-step */ -#define D102_E_CPUSAVER_TIMER_DWORD 42 -#define D102_E_CPUSAVER_BUNDLE_DWORD 54 -#define D102_E_CPUSAVER_MIN_SIZE_DWORD 46 - -#define D102_E_RCVBUNDLE_UCODE \ -{\ -0x007D028F, 0x0E4204F9, 0x14ED0C85, 0x14FA14E9, 0x0EF70E36, 0x1FFF1FFF, \ -0x00E014B9, 0x00000000, 0x00000000, 0x00000000, \ -0x00E014BD, 0x00000000, 0x00000000, 0x00000000, \ -0x00E014D5, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00E014C1, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00E014C8, 0x00000000, 0x00000000, 0x00000000, \ -0x00200600, 0x00E014EE, 0x00000000, 0x00000000, \ -0x0030FF80, 0x00940E46, 0x00038200, 0x00102000, \ -0x00E00E43, 0x00000000, 0x00000000, 0x00000000, \ -0x00300006, 0x00E014FB, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, \ -0x00906EFD, 0x00900EFD, 0x00E00EF8, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -0x00000000, 0x00000000, 0x00000000, 0x00000000, \ -} - static void e100_load_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb) { -/* *INDENT-OFF* */ - static struct { - u32 ucode[UCODE_SIZE + 1]; - u8 mac; - u8 timer_dword; - u8 bundle_dword; - u8 min_size_dword; - } ucode_opts[] = { - { D101M_B_RCVBUNDLE_UCODE, - mac_82559_D101M, - D101M_CPUSAVER_TIMER_DWORD, - D101M_CPUSAVER_BUNDLE_DWORD, - D101M_CPUSAVER_MIN_SIZE_DWORD }, - { D101S_RCVBUNDLE_UCODE, - mac_82559_D101S, - D101S_CPUSAVER_TIMER_DWORD, - D101S_CPUSAVER_BUNDLE_DWORD, - D101S_CPUSAVER_MIN_SIZE_DWORD }, - { D102_E_RCVBUNDLE_UCODE, - mac_82551_F, - D102_E_CPUSAVER_TIMER_DWORD, - D102_E_CPUSAVER_BUNDLE_DWORD, - D102_E_CPUSAVER_MIN_SIZE_DWORD }, - { D102_E_RCVBUNDLE_UCODE, - mac_82551_10, - D102_E_CPUSAVER_TIMER_DWORD, - D102_E_CPUSAVER_BUNDLE_DWORD, - D102_E_CPUSAVER_MIN_SIZE_DWORD }, - { {0}, 0, 0, 0, 0} - }, *opts; -/* *INDENT-ON* */ + int i; + static const u32 ucode[UCODE_SIZE] = { + /* NFS packets are misinterpreted as TCO packets and + * incorrectly routed to the BMC over SMBus. This + * microcode patch checks the fragmented IP bit in the + * NFS/UDP header to distinguish between NFS and TCO. */ + 0x0EF70E36, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, 0x1FFF1FFF, + 0x1FFF1FFF, 0x00906E41, 0x00800E3C, 0x00E00E39, 0x00000000, + 0x00906EFD, 0x00900EFD, 0x00E00EF8, + }; -#define BUNDLESMALL 1 -#define BUNDLEMAX 50 -#define INTDELAY 15000 - - opts = ucode_opts; - - /* do not load u-code for ICH devices */ - if (nic->flags & ich) - return; - - /* Search for ucode match against h/w rev_id */ - while (opts->mac) { - if (nic->mac == opts->mac) { - int i; - u32 *ucode = opts->ucode; - - /* Insert user-tunable settings */ - ucode[opts->timer_dword] &= 0xFFFF0000; - ucode[opts->timer_dword] |= - (u16) INTDELAY; - ucode[opts->bundle_dword] &= 0xFFFF0000; - ucode[opts->bundle_dword] |= (u16) BUNDLEMAX; - ucode[opts->min_size_dword] &= 0xFFFF0000; - ucode[opts->min_size_dword] |= - (BUNDLESMALL) ? 0xFFFF : 0xFF80; - - for(i = 0; i < UCODE_SIZE; i++) - cb->u.ucode[i] = cpu_to_le32(ucode[i]); - cb->command = cpu_to_le16(cb_ucode); - return; - } - opts++; - } - - cb->command = cpu_to_le16(cb_nop); + if(nic->mac == mac_82551_F || nic->mac == mac_82551_10) { + for(i = 0; i < UCODE_SIZE; i++) + cb->u.ucode[i] = cpu_to_le32(ucode[i]); + cb->command = cpu_to_le16(cb_ucode); + } else + cb->command = cpu_to_le16(cb_nop); } static void e100_setup_iaaddr(struct nic *nic, struct cb *cb, From 9621904012de3c8d0d4e2904dcc7170b3012119e Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 11 Oct 2005 01:52:39 -0400 Subject: [PATCH 21/59] sata_nv: Fixed bug introduced by 0.08's MCP51 and MCP55 support. --- drivers/scsi/sata_nv.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index c05653c7779d..cb832b03ec5e 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -29,6 +29,8 @@ * NV-specific details such as register offsets, SATA phy location, * hotplug info, etc. * + * 0.09 + * - Fixed bug introduced by 0.08's MCP51 and MCP55 support. * * 0.08 * - Added support for MCP51 and MCP55. @@ -132,9 +134,7 @@ enum nv_host_type GENERIC, NFORCE2, NFORCE3, - CK804, - MCP51, - MCP55 + CK804 }; static struct pci_device_id nv_pci_tbl[] = { @@ -153,13 +153,13 @@ static struct pci_device_id nv_pci_tbl[] = { { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, GENERIC }, { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC }, From e5945b4f605d1479d5b44252a2c691168c5d38d6 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 11 Oct 2005 08:28:59 -0700 Subject: [PATCH 22/59] [PATCH] s390: ccw device reconnect oops. Search for a disconnect ccw_device on the ccw bus rather than on the css bus (was a typo in patch I did for the klist conversion). A cast to an embedding ccw_device from an embedded device in a struct subchannel will lead us to oopses. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/cio/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 14c76f5e4177..9adc11e8b8bc 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -544,7 +544,7 @@ get_disc_ccwdev_by_devno(unsigned int devno, struct ccw_device *sibling) .sibling = sibling, }; - dev = bus_find_device(&css_bus_type, NULL, &data, match_devno); + dev = bus_find_device(&ccw_bus_type, NULL, &data, match_devno); return dev ? to_ccwdev(dev) : NULL; } From e4314bf496bb7bb9acd754aeb319c30869bc8d76 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Oct 2005 08:29:00 -0700 Subject: [PATCH 23/59] [PATCH] ppc64: Fix PCI hotplug pSeries_irq_bus_setup is marked __devinit but references s7a_workaround which is marked __initdata. Depending on who got the memory for s7a_workaround (and if the value was now positive), it was possible for PCI hotplugged devices to have 3 subtracted from their interrupt number. This would happen randomly and caused me much confusion :) Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pSeries_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c index 1f5f141fb7a1..928f8febdb3b 100644 --- a/arch/ppc64/kernel/pSeries_pci.c +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -32,7 +32,7 @@ #include "pci.h" -static int __initdata s7a_workaround = -1; +static int __devinitdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -60,7 +60,7 @@ void pcibios_name_device(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void __init check_s7a(void) +static void __devinit check_s7a(void) { struct device_node *root; char *model; From ad6ce87e5bd4440a6ce9aa9f8cda795b9e902eff Mon Sep 17 00:00:00 2001 From: Abhay Salunke Date: Tue, 11 Oct 2005 08:29:02 -0700 Subject: [PATCH 24/59] [PATCH] dell_rbu: changes in packet update mechanism In the current dell_rbu code ver 2.0 the packet update mechanism makes the user app dump every individual packet in to the driver. This adds in efficiency as every packet update makes the /sys/class/firmware/dell_rbu/loading and data files to disappear and reappear again. Thus the user app needs to wait for the files to reappear to dump another packet. This slows down the packet update tremendously in case of large number of packets. I am submitting a new patch for dell_rbu which will change the way we do packet updates; In the new method the user app will create a new single file which has already packetized the rbu image and all the packets are now staged in this file. This driver also creates a new entry in /sys/devices/platform/dell_rbu/packet_size ; the user needs to echo the packet size here before downloading the packet file. The user should do the following: create one single file which has all the packets stacked together. echo the packet size in to /sys/devices/platform/dell_rbu/packet_size. echo 1 > /sys/class/firmware/dell_rbu/loading cat the packetfile > /sys/class/firmware/dell_rbu/data echo 0 > /sys/class/firmware/dell_rbu/loading The driver takes the file which came through /sys/class/firmware/dell_rbu/data and takes chunks of paket_size data from it and place in contiguous memory. This makes packet update process very efficient and fast. As all the packet update happens in one single operation. The user can still read back the downloaded file from /sys/devices/platform/dell_rbu/data. Signed-off-by: Abhay Salunke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dell_rbu.txt | 38 ++++++-- drivers/firmware/dell_rbu.c | 178 +++++++++++++++++++----------------- 2 files changed, 123 insertions(+), 93 deletions(-) diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt index 95d7f62e4dbc..941343a7a265 100644 --- a/Documentation/dell_rbu.txt +++ b/Documentation/dell_rbu.txt @@ -35,6 +35,7 @@ The driver load creates the following directories under the /sys file system. /sys/class/firmware/dell_rbu/data /sys/devices/platform/dell_rbu/image_type /sys/devices/platform/dell_rbu/data +/sys/devices/platform/dell_rbu/packet_size The driver supports two types of update mechanism; monolithic and packetized. These update mechanism depends upon the BIOS currently running on the system. @@ -47,8 +48,26 @@ By default the driver uses monolithic memory for the update type. This can be changed to packets during the driver load time by specifying the load parameter image_type=packet. This can also be changed later as below echo packet > /sys/devices/platform/dell_rbu/image_type -Also echoing either mono ,packet or init in to image_type will free up the -memory allocated by the driver. + +In packet update mode the packet size has to be given before any packets can +be downloaded. It is done as below +echo XXXX > /sys/devices/platform/dell_rbu/packet_size +In the packet update mechanism, the user neesd to create a new file having +packets of data arranged back to back. It can be done as follows +The user creates packets header, gets the chunk of the BIOS image and +placs it next to the packetheader; now, the packetheader + BIOS image chunk +added to geather should match the specified packet_size. This makes one +packet, the user needs to create more such packets out of the entire BIOS +image file and then arrange all these packets back to back in to one single +file. +This file is then copied to /sys/class/firmware/dell_rbu/data. +Once this file gets to the driver, the driver extracts packet_size data from +the file and spreads it accross the physical memory in contiguous packet_sized +space. +This method makes sure that all the packets get to the driver in a single operation. + +In monolithic update the user simply get the BIOS image (.hdr file) and copies +to the data file as is without any change to the BIOS image itself. Do the steps below to download the BIOS image. 1) echo 1 > /sys/class/firmware/dell_rbu/loading @@ -58,7 +77,10 @@ Do the steps below to download the BIOS image. The /sys/class/firmware/dell_rbu/ entries will remain till the following is done. echo -1 > /sys/class/firmware/dell_rbu/loading. -Until this step is completed the drivr cannot be unloaded. +Until this step is completed the driver cannot be unloaded. +Also echoing either mono ,packet or init in to image_type will free up the +memory allocated by the driver. + If an user by accident executes steps 1 and 3 above without executing step 2; it will make the /sys/class/firmware/dell_rbu/ entries to disappear. The entries can be recreated by doing the following @@ -66,15 +88,11 @@ echo init > /sys/devices/platform/dell_rbu/image_type NOTE: echoing init in image_type does not change it original value. Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to -read back the image downloaded. This is useful in case of packet update -mechanism where the above steps 1,2,3 will repeated for every packet. -By reading the /sys/devices/platform/dell_rbu/data file all packet data -downloaded can be verified in a single file. -The packets are arranged in this file one after the other in a FIFO order. +read back the image downloaded. NOTE: -This driver requires a patch for firmware_class.c which has the addition -of request_firmware_nowait_nohotplug function to wortk +This driver requires a patch for firmware_class.c which has the modified +request_firmware_nowait function. Also after updating the BIOS image an user mdoe application neeeds to execute code which message the BIOS update request to the BIOS. So on the next reboot the BIOS knows about the new image downloaded and it updates it self. diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index b66782398258..4f4ba9b6d182 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -50,7 +50,7 @@ MODULE_AUTHOR("Abhay Salunke "); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); MODULE_LICENSE("GPL"); -MODULE_VERSION("2.0"); +MODULE_VERSION("3.0"); #define BIOS_SCAN_LIMIT 0xffffffff #define MAX_IMAGE_LENGTH 16 @@ -62,15 +62,16 @@ static struct _rbu_data { int dma_alloc; spinlock_t lock; unsigned long packet_read_count; - unsigned long packet_write_count; unsigned long num_packets; unsigned long packetsize; + unsigned long imagesize; int entry_created; } rbu_data; static char image_type[MAX_IMAGE_LENGTH + 1] = "mono"; module_param_string(image_type, image_type, sizeof (image_type), 0); -MODULE_PARM_DESC(image_type, "BIOS image type. choose- mono or packet"); +MODULE_PARM_DESC(image_type, + "BIOS image type. choose- mono or packet or init"); struct packet_data { struct list_head list; @@ -88,55 +89,13 @@ static dma_addr_t dell_rbu_dmaaddr; static void init_packet_head(void) { INIT_LIST_HEAD(&packet_data_head.list); - rbu_data.packet_write_count = 0; rbu_data.packet_read_count = 0; rbu_data.num_packets = 0; rbu_data.packetsize = 0; + rbu_data.imagesize = 0; } -static int fill_last_packet(void *data, size_t length) -{ - struct list_head *ptemp_list; - struct packet_data *packet = NULL; - int packet_count = 0; - - pr_debug("fill_last_packet: entry \n"); - - if (!rbu_data.num_packets) { - pr_debug("fill_last_packet: num_packets=0\n"); - return -ENOMEM; - } - - packet_count = rbu_data.num_packets; - - ptemp_list = (&packet_data_head.list)->prev; - - packet = list_entry(ptemp_list, struct packet_data, list); - - if ((rbu_data.packet_write_count + length) > rbu_data.packetsize) { - pr_debug("dell_rbu:%s: packet size data " - "overrun\n", __FUNCTION__); - return -EINVAL; - } - - pr_debug("fill_last_packet : buffer = %p\n", packet->data); - - memcpy((packet->data + rbu_data.packet_write_count), data, length); - - if ((rbu_data.packet_write_count + length) == rbu_data.packetsize) { - /* - * this was the last data chunk in the packet - * so reinitialize the packet data counter to zero - */ - rbu_data.packet_write_count = 0; - } else - rbu_data.packet_write_count += length; - - pr_debug("fill_last_packet: exit \n"); - return 0; -} - -static int create_packet(size_t length) +static int create_packet(void *data, size_t length) { struct packet_data *newpacket; int ordernum = 0; @@ -186,9 +145,11 @@ static int create_packet(size_t length) INIT_LIST_HEAD(&newpacket->list); list_add_tail(&newpacket->list, &packet_data_head.list); /* - * packets have fixed size + * packets may not have fixed size */ - newpacket->length = rbu_data.packetsize; + newpacket->length = length; + + memcpy(newpacket->data, data, length); pr_debug("create_packet: exit \n"); @@ -198,13 +159,37 @@ static int create_packet(size_t length) static int packetize_data(void *data, size_t length) { int rc = 0; - - if (!rbu_data.packet_write_count) { - if ((rc = create_packet(length))) - return rc; + int done = 0; + int packet_length; + u8 *temp; + u8 *end = (u8 *) data + length; + pr_debug("packetize_data: data length %d\n", length); + if (!rbu_data.packetsize) { + printk(KERN_WARNING + "dell_rbu: packetsize not specified\n"); + return -EIO; } - if ((rc = fill_last_packet(data, length))) - return rc; + + temp = (u8 *) data; + + /* packetize the hunk */ + while (!done) { + if ((temp + rbu_data.packetsize) < end) + packet_length = rbu_data.packetsize; + else { + /* this is the last packet */ + packet_length = end - temp; + done = 1; + } + + if ((rc = create_packet(temp, packet_length))) + return rc; + + pr_debug("%lu:%lu\n", temp, (end - temp)); + temp += packet_length; + } + + rbu_data.imagesize = length; return rc; } @@ -243,7 +228,7 @@ static int do_packet_read(char *data, struct list_head *ptemp_list, return bytes_copied; } -static int packet_read_list(char *data, size_t *pread_length) +static int packet_read_list(char *data, size_t * pread_length) { struct list_head *ptemp_list; int temp_count = 0; @@ -303,10 +288,9 @@ static void packet_empty_list(void) newpacket->ordernum); kfree(newpacket); } - rbu_data.packet_write_count = 0; rbu_data.packet_read_count = 0; rbu_data.num_packets = 0; - rbu_data.packetsize = 0; + rbu_data.imagesize = 0; } /* @@ -425,7 +409,6 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count) size_t bytes_left; size_t data_length; char *ptempBuf = buffer; - unsigned long imagesize; /* check to see if we have something to return */ if (rbu_data.num_packets == 0) { @@ -434,22 +417,20 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count) goto read_rbu_data_exit; } - imagesize = rbu_data.num_packets * rbu_data.packetsize; - - if (pos > imagesize) { + if (pos > rbu_data.imagesize) { retval = 0; printk(KERN_WARNING "dell_rbu:read_packet_data: " "data underrun\n"); goto read_rbu_data_exit; } - bytes_left = imagesize - pos; + bytes_left = rbu_data.imagesize - pos; data_length = min(bytes_left, count); if ((retval = packet_read_list(ptempBuf, &data_length)) < 0) goto read_rbu_data_exit; - if ((pos + count) > imagesize) { + if ((pos + count) > rbu_data.imagesize) { rbu_data.packet_read_count = 0; /* this was the last copy */ retval = bytes_left; @@ -499,7 +480,7 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) } static ssize_t read_rbu_data(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) + loff_t pos, size_t count) { ssize_t ret_count = 0; @@ -531,13 +512,18 @@ static void callbackfn_rbu(const struct firmware *fw, void *context) memcpy(rbu_data.image_update_buffer, fw->data, fw->size); } else if (!strcmp(image_type, "packet")) { - if (!rbu_data.packetsize) - rbu_data.packetsize = fw->size; - else if (rbu_data.packetsize != fw->size) { + /* + * we need to free previous packets if a + * new hunk of packets needs to be downloaded + */ + packet_empty_list(); + if (packetize_data(fw->data, fw->size)) + /* Incase something goes wrong when we are + * in middle of packetizing the data, we + * need to free up whatever packets might + * have been created before we quit. + */ packet_empty_list(); - rbu_data.packetsize = fw->size; - } - packetize_data(fw->data, fw->size); } else pr_debug("invalid image type specified.\n"); spin_unlock(&rbu_data.lock); @@ -553,7 +539,7 @@ static void callbackfn_rbu(const struct firmware *fw, void *context) } static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) + loff_t pos, size_t count) { int size = 0; if (!pos) @@ -562,7 +548,7 @@ static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, } static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) + loff_t pos, size_t count) { int rc = count; int req_firm_rc = 0; @@ -621,25 +607,49 @@ static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, return rc; } +static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer, + loff_t pos, size_t count) +{ + int size = 0; + if (!pos) { + spin_lock(&rbu_data.lock); + size = sprintf(buffer, "%lu\n", rbu_data.packetsize); + spin_unlock(&rbu_data.lock); + } + return size; +} + +static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer, + loff_t pos, size_t count) +{ + unsigned long temp; + spin_lock(&rbu_data.lock); + packet_empty_list(); + sscanf(buffer, "%lu", &temp); + if (temp < 0xffffffff) + rbu_data.packetsize = temp; + + spin_unlock(&rbu_data.lock); + return count; +} + static struct bin_attribute rbu_data_attr = { - .attr = { - .name = "data", - .owner = THIS_MODULE, - .mode = 0444, - }, + .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444}, .read = read_rbu_data, }; static struct bin_attribute rbu_image_type_attr = { - .attr = { - .name = "image_type", - .owner = THIS_MODULE, - .mode = 0644, - }, + .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644}, .read = read_rbu_image_type, .write = write_rbu_image_type, }; +static struct bin_attribute rbu_packet_size_attr = { + .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644}, + .read = read_rbu_packet_size, + .write = write_rbu_packet_size, +}; + static int __init dcdrbu_init(void) { int rc = 0; @@ -657,6 +667,8 @@ static int __init dcdrbu_init(void) sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr); sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr); + sysfs_create_bin_file(&rbu_device->dev.kobj, + &rbu_packet_size_attr); rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOHOTPLUG, "dell_rbu", &rbu_device->dev, &context, callbackfn_rbu); From 19cba8abd6ca09527c194864ae651db65cbacfe1 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 11 Oct 2005 08:29:03 -0700 Subject: [PATCH 25/59] [PATCH] v9fs: remove additional buffer allocation from v9fs_file_read and v9fs_file_write v9fs_file_read and v9fs_file_write use kmalloc to allocate buffers as big as the data buffer received as parameter. kmalloc cannot be used to allocate buffers bigger than 128K, so reading/writing data in chunks bigger than 128k fails. This patch reorganizes v9fs_file_read and v9fs_file_write to allocate only buffers as big as the maximum data that can be sent in one 9P message. Signed-off-by: Latchesar Ionkov Cc: Eric Van Hensbergen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_file.c | 180 +++++++++++++++++------------------------------ 1 file changed, 66 insertions(+), 114 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a4799e971d1c..bbc3cc63854f 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -175,16 +175,16 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) } /** - * v9fs_read - read from a file (internal) + * v9fs_file_read - read from a file * @filep: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data * */ - static ssize_t -v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) +v9fs_file_read(struct file *filp, char __user * data, size_t count, + loff_t * offset) { struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); @@ -194,6 +194,7 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) int rsize = 0; int result = 0; int total = 0; + int n; dprintk(DEBUG_VFS, "\n"); @@ -216,10 +217,15 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) } else *offset += result; - /* XXX - extra copy */ - memcpy(buffer, fcall->params.rread.data, result); + n = copy_to_user(data, fcall->params.rread.data, result); + if (n) { + dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n); + kfree(fcall); + return -EFAULT; + } + count -= result; - buffer += result; + data += result; total += result; kfree(fcall); @@ -231,101 +237,6 @@ v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) return total; } -/** - * v9fs_file_read - read from a file - * @filep: file pointer to read - * @data: data buffer to read data into - * @count: size of buffer - * @offset: offset at which to read data - * - */ - -static ssize_t -v9fs_file_read(struct file *filp, char __user * data, size_t count, - loff_t * offset) -{ - int retval = -1; - int ret = 0; - char *buffer; - - buffer = kmalloc(count, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - retval = v9fs_read(filp, buffer, count, offset); - if (retval > 0) { - if ((ret = copy_to_user(data, buffer, retval)) != 0) { - dprintk(DEBUG_ERROR, "Problem copying to user %d\n", - ret); - retval = ret; - } - } - - kfree(buffer); - - return retval; -} - -/** - * v9fs_write - write to a file - * @filep: file pointer to write - * @data: data buffer to write data from - * @count: size of buffer - * @offset: offset at which to write data - * - */ - -static ssize_t -v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) -{ - struct inode *inode = filp->f_dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9fid = filp->private_data; - struct v9fs_fcall *fcall; - int fid = v9fid->fid; - int result = -EIO; - int rsize = 0; - int total = 0; - - dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count, - (int)*offset); - rsize = v9ses->maxdata - V9FS_IOHDRSZ; - if (v9fid->iounit != 0 && rsize > v9fid->iounit) - rsize = v9fid->iounit; - - dump_data(buffer, count); - - do { - if (count < rsize) - rsize = count; - - result = - v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall); - if (result < 0) { - eprintk(KERN_ERR, "error while writing: %s(%d)\n", - FCALL_ERROR(fcall), result); - kfree(fcall); - return result; - } else - *offset += result; - - kfree(fcall); - - if (result != rsize) { - eprintk(KERN_ERR, - "short write: v9fs_t_write returned %d\n", - result); - break; - } - - count -= result; - buffer += result; - total += result; - } while (count); - - return total; -} - /** * v9fs_file_write - write to a file * @filep: file pointer to write @@ -339,24 +250,65 @@ static ssize_t v9fs_file_write(struct file *filp, const char __user * data, size_t count, loff_t * offset) { - int ret = -1; - char *buffer; + struct inode *inode = filp->f_dentry->d_inode; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *v9fid = filp->private_data; + struct v9fs_fcall *fcall; + int fid = v9fid->fid; + int result = -EIO; + int rsize = 0; + int total = 0; + char *buf; - buffer = kmalloc(count, GFP_KERNEL); - if (buffer == NULL) + dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, + (int)*offset); + rsize = v9ses->maxdata - V9FS_IOHDRSZ; + if (v9fid->iounit != 0 && rsize > v9fid->iounit) + rsize = v9fid->iounit; + + buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL); + if (!buf) return -ENOMEM; - ret = copy_from_user(buffer, data, count); - if (ret) { - dprintk(DEBUG_ERROR, "Problem copying from user\n"); - ret = -EFAULT; - } else { - ret = v9fs_write(filp, buffer, count, offset); - } + do { + if (count < rsize) + rsize = count; - kfree(buffer); + result = copy_from_user(buf, data, rsize); + if (result) { + dprintk(DEBUG_ERROR, "Problem copying from user\n"); + kfree(buf); + return -EFAULT; + } - return ret; + dump_data(buf, rsize); + result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall); + if (result < 0) { + eprintk(KERN_ERR, "error while writing: %s(%d)\n", + FCALL_ERROR(fcall), result); + kfree(fcall); + kfree(buf); + return result; + } else + *offset += result; + + kfree(fcall); + fcall = NULL; + + if (result != rsize) { + eprintk(KERN_ERR, + "short write: v9fs_t_write returned %d\n", + result); + break; + } + + count -= result; + data += result; + total += result; + } while (count); + + kfree(buf); + return total; } struct file_operations v9fs_file_operations = { From 22c1ea44f0d33eda532883858b6cdabc5f265b66 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Oct 2005 08:29:05 -0700 Subject: [PATCH 26/59] [PATCH] nfsacl: Solaris VxFS compatibility fix Here is a compatibility fix between Linux and Solaris when used with VxFS filesystems: Solaris usually accepts acl entries in any order, but with VxFS it replies with NFSERR_INVAL when it sees a four-entry acl that is not in canonical form. It may also fail with other non-canonical acls -- I can't tell, because that case never triggers: We only send non-canonical acls when we fake up an ACL_MASK entry. Instead of adding fake ACL_MASK entries at the end, inserting them in the correct position makes Solaris+VxFS happy. The Linux client and server sides don't care about entry order. The three-entry-acl special case in which we need a fake ACL_MASK entry was handled in xdr_nfsace_encode. The patch moves this into nfsacl_encode. Signed-off-by: Andreas Gruenbacher Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs_common/nfsacl.c | 70 +++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 251e5a1bb1c4..0c2be8c0307d 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -48,43 +48,26 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) (struct nfsacl_encode_desc *) desc; u32 *p = (u32 *) elem; - if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { - struct posix_acl_entry *entry = - &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; - *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); - switch(entry->e_tag) { - case ACL_USER_OBJ: - *p++ = htonl(nfsacl_desc->uid); - break; - case ACL_GROUP_OBJ: - *p++ = htonl(nfsacl_desc->gid); - break; - case ACL_USER: - case ACL_GROUP: - *p++ = htonl(entry->e_id); - break; - default: /* Solaris depends on that! */ - *p++ = 0; - break; - } - *p++ = htonl(entry->e_perm & S_IRWXO); - } else { - const struct posix_acl_entry *pa, *pe; - int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; - - FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { - if (pa->e_tag == ACL_GROUP_OBJ) { - group_obj_perm = pa->e_perm & S_IRWXO; - break; - } - } - /* fake up ACL_MASK entry */ - *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); - *p++ = htonl(0); - *p++ = htonl(group_obj_perm); + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; } - + *p++ = htonl(entry->e_perm & S_IRWXO); return 0; } @@ -105,11 +88,28 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, .gid = inode->i_gid, }; int err; + struct posix_acl *acl2 = NULL; if (entries > NFS_ACL_MAX_ENTRIES || xdr_encode_word(buf, base, entries)) return -EINVAL; + if (encode_entries && acl && acl->a_count == 3) { + /* Fake up an ACL_MASK entry. */ + acl2 = posix_acl_alloc(4, GFP_KERNEL); + if (!acl2) + return -ENOMEM; + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (acl2) + posix_acl_release(acl2); if (!err) err = 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; From 1bef40032992320dd25a266fc166bfb8fa3f2f59 Mon Sep 17 00:00:00 2001 From: Suzuki Date: Tue, 11 Oct 2005 08:29:06 -0700 Subject: [PATCH 27/59] [PATCH] madvise: Avoid returning error code -EBADF for anonymous mappings Revert this recent correctness change: Douglas Crosher reported that it broke an existing application, and that madvise() works without error on anonymous mappings on Solaris. This means that madvise() will remain non-standards-compliant: we should return -EBADF for all requests against non-file-backed vma's, but Linux only does this for MADV_WILLNEED requests. Signed-off-by: Suzuki K P Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 4454936f87d1..20e075d1c64c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -83,6 +83,9 @@ static long madvise_willneed(struct vm_area_struct * vma, { struct file *file = vma->vm_file; + if (!file) + return -EBADF; + if (file->f_mapping->a_ops->get_xip_page) { /* no bad return value, but ignore advice */ return 0; @@ -141,11 +144,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { - struct file *filp = vma->vm_file; - long error = -EBADF; - - if (!filp) - goto out; + long error; switch (behavior) { case MADV_NORMAL: @@ -166,8 +165,6 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, error = -EINVAL; break; } - -out: return error; } From a0c111c631e7ab4abd68920debd44259160812ef Mon Sep 17 00:00:00 2001 From: Paolo Galtieri Date: Tue, 11 Oct 2005 08:29:07 -0700 Subject: [PATCH 28/59] [PATCH] ppc highmem fix I've noticed that the calculations for seg_size and nr_segs in __dma_sync_page_highmem() (arch/ppc/kernel/dma-mapping.c) are wrong. The incorrect calculations can result in either an oops or a panic when running fsck depending on the size of the partition. The problem with the seg_size calculation is that it can result in a negative number if size is offset > size. The problem with the nr_segs caculation is returns the wrong number of segments, e.g. it returns 1 when size is 200 and offset is 4095, when it should return 2 or more. Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/kernel/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index b566d982806c..8edee806dae7 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c @@ -401,10 +401,10 @@ EXPORT_SYMBOL(__dma_sync); static inline void __dma_sync_page_highmem(struct page *page, unsigned long offset, size_t size, int direction) { - size_t seg_size = min((size_t)PAGE_SIZE, size) - offset; + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); size_t cur_size = seg_size; unsigned long flags, start, seg_offset = offset; - int nr_segs = PAGE_ALIGN(size + (PAGE_SIZE - offset))/PAGE_SIZE; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; local_irq_save(flags); From 6de505173e24e76bb33a2595312e0c2b44d49e58 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Tue, 11 Oct 2005 08:29:08 -0700 Subject: [PATCH 29/59] [PATCH] binfmt_elf bss padding fix Nir Tzachar points out that if an ELF file specifies a zero-length bss at a whacky address, we cannot load that binary because padzero() tries to zero out the end of the page at the whacky address, and that may not be writeable. See also http://bugzilla.kernel.org/show_bug.cgi?id=5411 So teach load_elf_binary() to skip the bss settng altogether if the elf file has a zero-length bss segment. Cc: Roland McGrath Cc: Daniel Jacobowitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7976a238f0a3..d4b15576e584 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -905,7 +905,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) send_sig(SIGKILL, current, 0); goto out_free_dentry; } - if (padzero(elf_bss)) { + if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { send_sig(SIGSEGV, current, 0); retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; From 9de11aab1c8fd87da7e1fb435ce0ff26bacd7909 Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Tue, 11 Oct 2005 08:29:09 -0700 Subject: [PATCH 30/59] [PATCH] m32r: trap handler code for illegal traps This patch prevents illegal traps from causing m32r kernel's infinite loop execution. Signed-off-by: Naoto Sugai Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/entry.S | 9 +++++++++ arch/m32r/kernel/traps.c | 33 ++++++++++++++++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index dddbf6b5ed2c..85920fb8d08c 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -681,6 +681,15 @@ ENTRY(debug_trap) bl do_debug_trap bra error_code +ENTRY(ill_trap) + /* void ill_trap(void) */ + SWITCH_TO_KERNEL_STACK + SAVE_ALL + ldi r1, #0 ; error_code ; FIXME + mv r0, sp ; pt_regs + bl do_ill_trap + bra error_code + /* Cache flushing handler */ ENTRY(cache_flushing_handler) diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 01922271d17e..5fe8ed6d62dc 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -5,8 +5,6 @@ * Hitoshi Yamamoto */ -/* $Id$ */ - /* * 'traps.c' handles hardware traps and faults after we have saved some * state in 'entry.S'. @@ -35,6 +33,7 @@ asmlinkage void ei_handler(void); asmlinkage void rie_handler(void); asmlinkage void debug_trap(void); asmlinkage void cache_flushing_handler(void); +asmlinkage void ill_trap(void); #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(void); @@ -77,22 +76,22 @@ void set_eit_vector_entries(void) eit_vector[5] = BRA_INSN(default_eit_handler, 5); eit_vector[8] = BRA_INSN(rie_handler, 8); eit_vector[12] = BRA_INSN(alignment_check, 12); - eit_vector[16] = 0xff000000UL; + eit_vector[16] = BRA_INSN(ill_trap, 16); eit_vector[17] = BRA_INSN(debug_trap, 17); eit_vector[18] = BRA_INSN(system_call, 18); - eit_vector[19] = 0xff000000UL; - eit_vector[20] = 0xff000000UL; - eit_vector[21] = 0xff000000UL; - eit_vector[22] = 0xff000000UL; - eit_vector[23] = 0xff000000UL; - eit_vector[24] = 0xff000000UL; - eit_vector[25] = 0xff000000UL; - eit_vector[26] = 0xff000000UL; - eit_vector[27] = 0xff000000UL; + eit_vector[19] = BRA_INSN(ill_trap, 19); + eit_vector[20] = BRA_INSN(ill_trap, 20); + eit_vector[21] = BRA_INSN(ill_trap, 21); + eit_vector[22] = BRA_INSN(ill_trap, 22); + eit_vector[23] = BRA_INSN(ill_trap, 23); + eit_vector[24] = BRA_INSN(ill_trap, 24); + eit_vector[25] = BRA_INSN(ill_trap, 25); + eit_vector[26] = BRA_INSN(ill_trap, 26); + eit_vector[27] = BRA_INSN(ill_trap, 27); eit_vector[28] = BRA_INSN(cache_flushing_handler, 28); - eit_vector[29] = 0xff000000UL; - eit_vector[30] = 0xff000000UL; - eit_vector[31] = 0xff000000UL; + eit_vector[29] = BRA_INSN(ill_trap, 29); + eit_vector[30] = BRA_INSN(ill_trap, 30); + eit_vector[31] = BRA_INSN(ill_trap, 31); eit_vector[32] = BRA_INSN(ei_handler, 32); eit_vector[64] = BRA_INSN(pie_handler, 64); #ifdef CONFIG_MMU @@ -286,7 +285,8 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ DO_ERROR( 1, SIGTRAP, "debug trap", debug_trap) DO_ERROR_INFO(0x20, SIGILL, "reserved instruction ", rie_handler, ILL_ILLOPC, regs->bpc) -DO_ERROR_INFO(0x100, SIGILL, "privilege instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(0x100, SIGILL, "privileged instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(-1, SIGILL, "illegal trap", ill_trap, ILL_ILLTRP, regs->bpc) extern int handle_unaligned_access(unsigned long, struct pt_regs *); @@ -329,4 +329,3 @@ asmlinkage void do_alignment_check(struct pt_regs *regs, long error_code) set_fs(oldfs); } } - From d3089792f6ee38cdc9e254a7cb2f8c8d7f38c20d Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Tue, 11 Oct 2005 09:28:24 -0700 Subject: [PATCH 31/59] [PATCH] V4L: Enable s-video input on DViCO FusionHDTV5 Lite * bttv-cards.c: - Enable S-Video input on DViCO FusionHDTV5 Lite Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/bttv-cards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/video/bttv-cards.c b/drivers/media/video/bttv-cards.c index 6c332800d6ab..0881a17d5226 100644 --- a/drivers/media/video/bttv-cards.c +++ b/drivers/media/video/bttv-cards.c @@ -2393,10 +2393,10 @@ struct tvcard bttv_tvcards[] = { .tuner = 0, .tuner_type = TUNER_LG_TDVS_H062F, .tuner_addr = ADDR_UNSET, - .video_inputs = 2, + .video_inputs = 3, .audio_inputs = 1, .svhs = 2, - .muxsel = { 2, 3 }, + .muxsel = { 2, 3, 1 }, .gpiomask = 0x00e00007, .audiomux = { 0x00400005, 0, 0x00000001, 0, 0x00c00007, 0 }, .no_msp34xx = 1, From 9149ccfa3571eaa4a4b444777d67fc4ed3ebcf27 Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Tue, 11 Oct 2005 09:28:24 -0700 Subject: [PATCH 32/59] [PATCH] ppc64: Add R_PPC64_TOC16 module reloc Newer gcc's are generating this relocation, so the module loader needs to handle it. Signed-off-by: Peter Bergner Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/module.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c index c683bf88e690..928b8581fcb0 100644 --- a/arch/ppc64/kernel/module.c +++ b/arch/ppc64/kernel/module.c @@ -341,6 +341,19 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = my_r2(sechdrs, me); break; + case R_PPC64_TOC16: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC64_TOC16_DS: /* Subtact TOC pointer */ value -= my_r2(sechdrs, me); From f5154a98a1931641f0448f6512294a15279110d7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 11 Oct 2005 19:16:26 +0100 Subject: [PATCH 33/59] [PATCH] Don't map the same page too much Refuse to install a page into a mapping if the mapping count is already ridiculously large. You probably cannot trigger this on 32-bit architectures, but on a 64-bit setup we should protect against it. Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/fremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/fremap.c b/mm/fremap.c index 3235fb77c133..ab23a0673c35 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -89,6 +89,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (!page->mapping || page->index >= size) goto err_unlock; + err = -ENOMEM; + if (page_mapcount(page) > INT_MAX/2) + goto err_unlock; zap_pte(mm, vma, addr, pte); From b1b510aa284af1908d5d369d52f7dae16aaabd71 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 11 Oct 2005 15:45:16 -0700 Subject: [PATCH 34/59] [SPARC64]: Fix net booting on Ultra5 We were not doing alignment properly when remapping the kernel image. What we want is a 4MB aligned physical address to map at KERNBASE. Mistakedly we were 4MB aligning the virtual address where the kernel initially sits, that's wrong. Instead, we should PAGE align the virtual address, then 4MB align the physical address result the prom gives to us. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index f1dcdf8f7433..4c942f71184d 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -191,8 +191,9 @@ prom_boot_mapping_phys_low: stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate" stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache - srlx %l0, 22, %l3 - sllx %l3, 22, %l3 + /* PAGE align */ + srlx %l0, 13, %l3 + sllx %l3, 13, %l3 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC stx %g0, [%sp + 2047 + 128 + 0x30] ! res1 stx %g0, [%sp + 2047 + 128 + 0x38] ! res2 @@ -211,6 +212,9 @@ prom_boot_mapping_phys_low: ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high stx %l2, [%l4 + 0x0] ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low + /* 4MB align */ + srlx %l3, 22, %l3 + sllx %l3, 22, %l3 stx %l3, [%l4 + 0x8] /* Leave service as-is, "call-method" */ From 91acb21f084aa244f26839406ae7ed8aa3668058 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 10 Oct 2005 23:10:32 -0400 Subject: [PATCH 35/59] [PATCH] uml: revert block driver use of host AIO The patch to use host AIO support that I submitted early after 2.6.13 exposed some problems in the block driver. I have fixes for these, but am not comfortable putting them into 2.6.14 at this late date. So, this patch reverts the use of host AIO. I will resubmit the original patch, plus fixes to the driver after 2.6.14 in order to get a reasonable amount of testing before they're exposed to the general public. Signed-off-by: Jeff Dike Signed-off-by: Linus Torvalds --- arch/um/drivers/Makefile | 2 +- arch/um/drivers/ubd_kern.c | 560 +++++++++++++++++++++---------------- arch/um/drivers/ubd_user.c | 75 +++++ arch/um/include/aio.h | 18 +- arch/um/os-Linux/aio.c | 207 +++++++------- 5 files changed, 486 insertions(+), 376 deletions(-) create mode 100644 arch/um/drivers/ubd_user.c diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 783e18cae090..de17d4c6e02d 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o +ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e77a38da4350..f73134333f64 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -35,7 +35,6 @@ #include "linux/blkpg.h" #include "linux/genhd.h" #include "linux/spinlock.h" -#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -54,21 +53,20 @@ #include "mem.h" #include "mem_kern.h" #include "cow.h" -#include "aio.h" enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { - enum aio_type op; + enum ubd_req op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - int bitmap_offset; - long bitmap_start; - long bitmap_end; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; int error; }; @@ -82,31 +80,28 @@ extern int create_cow_file(char *cow_file, char *backing_file, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req, struct request *r, - unsigned long *bitmap); +extern void do_io(struct io_thread_req *req); -static inline int ubd_test_bit(__u64 bit, void *data) +static inline int ubd_test_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - return((buffer[n] & (1 << off)) != 0); + return((data[n] & (1 << off)) != 0); } -static inline void ubd_set_bit(__u64 bit, void *data) +static inline void ubd_set_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - buffer[n] |= (1 << off); + data[n] |= (1 << off); } /*End stuff from ubd_user.h*/ @@ -115,6 +110,8 @@ static inline void ubd_set_bit(__u64 bit, void *data) static DEFINE_SPINLOCK(ubd_io_lock); static DEFINE_SPINLOCK(ubd_lock); +static void (*do_ubd)(void); + static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, @@ -161,8 +158,6 @@ struct cow { int data_offset; }; -#define MAX_SG 64 - struct ubd { char *file; int count; @@ -173,7 +168,6 @@ struct ubd { int no_cow; struct cow cow; struct platform_device pdev; - struct scatterlist sg[MAX_SG]; }; #define DEFAULT_COW { \ @@ -466,114 +460,81 @@ __uml_help(fakehd, ); static void do_ubd_request(request_queue_t * q); -static int in_ubd; + +/* Only changed by ubd_init, which is an initcall. */ +int thread_fd = -1; /* Changed by ubd_handler, which is serialized because interrupts only * happen on CPU 0. */ int intr_count = 0; -static void ubd_end_request(struct request *req, int bytes, int uptodate) -{ - if (!end_that_request_first(req, uptodate, bytes >> 9)) { - add_disk_randomness(req->rq_disk); - end_that_request_last(req); - } -} - /* call ubd_finish if you need to serialize */ -static void __ubd_finish(struct request *req, int bytes) +static void __ubd_finish(struct request *req, int error) { - if(bytes < 0){ - ubd_end_request(req, 0, 0); - return; - } + int nsect; - ubd_end_request(req, bytes, 1); + if(error){ + end_request(req, 0); + return; + } + nsect = req->current_nr_sectors; + req->sector += nsect; + req->buffer += nsect << 9; + req->errors = 0; + req->nr_sectors -= nsect; + req->current_nr_sectors = 0; + end_request(req, 1); } -static inline void ubd_finish(struct request *req, int bytes) +static inline void ubd_finish(struct request *req, int error) { - spin_lock(&ubd_io_lock); - __ubd_finish(req, bytes); - spin_unlock(&ubd_io_lock); + spin_lock(&ubd_io_lock); + __ubd_finish(req, error); + spin_unlock(&ubd_io_lock); } -struct bitmap_io { - atomic_t count; - struct aio_context aio; -}; +/* Called without ubd_io_lock held */ +static void ubd_handler(void) +{ + struct io_thread_req req; + struct request *rq = elv_next_request(ubd_queue); + int n; -struct ubd_aio { - struct aio_context aio; - struct request *req; - int len; - struct bitmap_io *bitmap; - void *bitmap_buf; -}; - -static int ubd_reply_fd = -1; + do_ubd = NULL; + intr_count++; + n = os_read_file(thread_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " + "err = %d\n", os_getpid(), -n); + spin_lock(&ubd_io_lock); + end_request(rq, 0); + spin_unlock(&ubd_io_lock); + return; + } + + ubd_finish(rq, req.error); + reactivate_fd(thread_fd, UBD_IRQ); + do_ubd_request(ubd_queue); +} static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { - struct aio_thread_reply reply; - struct ubd_aio *aio; - struct request *req; - int err, n, fd = (int) (long) dev; - - while(1){ - err = os_read_file(fd, &reply, sizeof(reply)); - if(err == -EAGAIN) - break; - if(err < 0){ - printk("ubd_aio_handler - read returned err %d\n", - -err); - break; - } - - aio = container_of(reply.data, struct ubd_aio, aio); - n = reply.err; - - if(n == 0){ - req = aio->req; - req->nr_sectors -= aio->len >> 9; - - if((aio->bitmap != NULL) && - (atomic_dec_and_test(&aio->bitmap->count))){ - aio->aio = aio->bitmap->aio; - aio->len = 0; - kfree(aio->bitmap); - aio->bitmap = NULL; - submit_aio(&aio->aio); - } - else { - if((req->nr_sectors == 0) && - (aio->bitmap == NULL)){ - int len = req->hard_nr_sectors << 9; - ubd_finish(req, len); - } - - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - else if(n < 0){ - ubd_finish(aio->req, n); - if(aio->bitmap != NULL) - kfree(aio->bitmap); - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - reactivate_fd(fd, UBD_IRQ); - - do_ubd_request(ubd_queue); - + ubd_handler(); return(IRQ_HANDLED); } +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; + +void kill_io_thread(void) +{ + if(io_pid != -1) + os_kill_process(io_pid, 1); +} + +__uml_exitcall(kill_io_thread); + static int ubd_file_size(struct ubd *dev, __u64 *size_out) { char *file; @@ -608,7 +569,7 @@ static int ubd_open_dev(struct ubd *dev) &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, @@ -870,10 +831,6 @@ int ubd_init(void) { int i; - ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr); - if(ubd_reply_fd < 0) - printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd); - devfs_mk_dir("ubd"); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -884,7 +841,6 @@ int ubd_init(void) return -1; } - blk_queue_max_hw_segments(ubd_queue, MAX_SG); if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; @@ -896,12 +852,40 @@ int ubd_init(void) driver_register(&ubd_driver); for (i = 0; i < MAX_DEV; i++) ubd_add(i); - return 0; } late_initcall(ubd_init); +int ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return(0); + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + SA_INTERRUPT, "ubd", ubd_dev); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return(err); +} + +device_initcall(ubd_driver_init); + static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -939,55 +923,105 @@ static int ubd_release(struct inode * inode, struct file * file) return(0); } -static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) { - __u64 sector = req->offset / req->sectorsize; - int i; + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; - for(i = 0; i < req->length / req->sectorsize; i++){ - if(ubd_test_bit(sector + i, bitmap)) - continue; + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; - if(req->bitmap_start == -1) - req->bitmap_start = sector + i; - req->bitmap_end = sector + i + 1; + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } - ubd_set_bit(sector + i, bitmap); - } + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = req->offset >> 9; + int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); + } + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); } /* Called with ubd_io_lock held */ -static int prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; + __u64 offset; + int len; + + if(req->rq_status == RQ_INACTIVE) return(1); /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); - ubd_end_request(req, 0, 0); + end_request(req, 0); return(1); } + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; + io_req->sector_mask = 0; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; + io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->bitmap_offset = dev->cow.bitmap_offset; - io_req->bitmap_start = -1; - io_req->bitmap_end = -1; - if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE)) - cowify_bitmap(io_req, dev->cow.bitmap); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -996,36 +1030,30 @@ static void do_ubd_request(request_queue_t *q) { struct io_thread_req io_req; struct request *req; - __u64 sector; - int err; + int err, n; - if(in_ubd) - return; - in_ubd = 1; - while((req = elv_next_request(q)) != NULL){ - struct gendisk *disk = req->rq_disk; - struct ubd *dev = disk->private_data; - int n, i; - - blkdev_dequeue_request(req); - - sector = req->sector; - n = blk_rq_map_sg(q, req, dev->sg); - - for(i = 0; i < n; i++){ - struct scatterlist *sg = &dev->sg[i]; - - err = prepare_request(req, &io_req, sector << 9, - sg->offset, sg->length, - sg->page); - if(err) - continue; - - sector += sg->length >> 9; - do_io(&io_req, req, dev->cow.bitmap); + if(thread_fd == -1){ + while((req = elv_next_request(q)) != NULL){ + err = prepare_request(req, &io_req); + if(!err){ + do_io(&io_req); + __ubd_finish(req, io_req.error); + } + } + } + else { + if(do_ubd || (req = elv_next_request(q)) == NULL) + return; + err = prepare_request(req, &io_req); + if(!err){ + do_ubd = ubd_handler; + n = os_write_file(thread_fd, (char *) &io_req, + sizeof(io_req)); + if(n != sizeof(io_req)) + printk("write to io thread failed, " + "errno = %d\n", -n); } } - in_ubd = 0; } static int ubd_ioctl(struct inode * inode, struct file * file, @@ -1241,95 +1269,131 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, return(err); } -void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap) +static int update_bitmap(struct io_thread_req *req) { - struct ubd_aio *aio; - struct bitmap_io *bitmap_io = NULL; - char *buf; - void *bitmap_buf = NULL; - unsigned long len, sector; - int nsectors, start, end, bit, err; - __u64 off; + int n; - if(req->bitmap_start != -1){ - /* Round up to the nearest word */ - int round = sizeof(unsigned long); - len = (req->bitmap_end - req->bitmap_start + - round * 8 - 1) / (round * 8); - len *= round; + if(req->cow_offset == -1) + return(0); - off = req->bitmap_start / (8 * round); - off *= round; + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } - bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); - if(bitmap_io == NULL){ - printk("Failed to kmalloc bitmap IO\n"); - req->error = 1; - return; - } + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - bitmap_buf = kmalloc(len, GFP_KERNEL); - if(bitmap_buf == NULL){ - printk("do_io : kmalloc of bitmap chunk " - "failed\n"); - kfree(bitmap_io); - req->error = 1; - return; - } - memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); - - *bitmap_io = ((struct bitmap_io) - { .count = ATOMIC_INIT(0), - .aio = INIT_AIO(AIO_WRITE, req->fds[1], - bitmap_buf, len, - req->bitmap_offset + off, - ubd_reply_fd) } ); - } - - nsectors = req->length / req->sectorsize; - start = 0; - end = nsectors; - bit = 0; - do { - if(bitmap != NULL){ - sector = req->offset / req->sectorsize; - bit = ubd_test_bit(sector + start, bitmap); - end = start; - while((end < nsectors) && - (ubd_test_bit(sector + end, bitmap) == bit)) - end++; - } - - off = req->offsets[bit] + req->offset + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; - - aio = kmalloc(sizeof(*aio), GFP_KERNEL); - if(aio == NULL){ - req->error = 1; - return; - } - - *aio = ((struct ubd_aio) - { .aio = INIT_AIO(req->op, req->fds[bit], buf, - len, off, ubd_reply_fd), - .len = len, - .req = r, - .bitmap = bitmap_io, - .bitmap_buf = bitmap_buf }); - - if(aio->bitmap != NULL) - atomic_inc(&aio->bitmap->count); - - err = submit_aio(&aio->aio); - if(err){ - printk("do_io - submit_aio failed, " - "err = %d\n", err); - req->error = 1; - return; - } - - start = end; - } while(start < nsectors); + return(0); } + +void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + int err; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) + &req->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } else { + n = os_write_file(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); + + req->error = update_bitmap(req); +} + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread */ +int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req req; + int n; + + ignore_sigwinch_sig(); + while(1){ + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } + continue; + } + io_count++; + do_io(&req); + n = os_write_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c new file mode 100644 index 000000000000..b94d2bc4fe06 --- /dev/null +++ b/arch/um/drivers/ubd_user.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "ubd_user.h" +#include "os.h" +#include "cow.h" + +#include +#include + +void ignore_sigwinch_sig(void) +{ + signal(SIGWINCH, SIG_IGN); +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + *fd_out = fds[1]; + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + err = -errno; + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h index 83f16877ab08..423bae9153f8 100644 --- a/arch/um/include/aio.h +++ b/arch/um/include/aio.h @@ -14,27 +14,15 @@ struct aio_thread_reply { }; struct aio_context { - enum aio_type type; - int fd; - void *data; - int len; - unsigned long long offset; int reply_fd; struct aio_context *next; }; -#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \ - aio_reply_fd) \ - { .type = aio_type, \ - .fd = aio_fd, \ - .data = aio_data, \ - .len = aio_len, \ - .offset = aio_offset, \ - .reply_fd = aio_reply_fd } - #define INIT_AIO_CONTEXT { .reply_fd = -1, \ .next = NULL } -extern int submit_aio(struct aio_context *aio); +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); #endif diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index f6e64026f995..41cfb0944201 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,31 +16,18 @@ #include "user.h" #include "mode.h" +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + static int aio_req_fd_r = -1; static int aio_req_fd_w = -1; -static int update_aio(struct aio_context *aio, int res) -{ - if(res < 0) - aio->len = res; - else if((res == 0) && (aio->type == AIO_READ)){ - /* This is the EOF case - we have hit the end of the file - * and it ends in a partial block, so we fill the end of - * the block with zeros and claim success. - */ - memset(aio->data, 0, aio->len); - aio->len = 0; - } - else if(res > 0){ - aio->len -= res; - aio->data += res; - aio->offset += res; - return aio->len; - } - - return 0; -} - #if defined(HAVE_AIO_ABI) #include @@ -80,7 +66,8 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, * that it now backs the mmapped area. */ -static int do_aio(aio_context_t ctx, struct aio_context *aio) +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) { struct iocb iocb, *iocbp = &iocb; char c; @@ -88,39 +75,40 @@ static int do_aio(aio_context_t ctx, struct aio_context *aio) iocb = ((struct iocb) { .aio_data = (unsigned long) aio, .aio_reqprio = 0, - .aio_fildes = aio->fd, - .aio_buf = (unsigned long) aio->data, - .aio_nbytes = aio->len, - .aio_offset = aio->offset, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset, .aio_reserved1 = 0, .aio_reserved2 = 0, .aio_reserved3 = 0 }); - switch(aio->type){ + switch(type){ case AIO_READ: iocb.aio_lio_opcode = IOCB_CMD_PREAD; + err = io_submit(ctx, 1, &iocbp); break; case AIO_WRITE: iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + err = io_submit(ctx, 1, &iocbp); break; case AIO_MMAP: iocb.aio_lio_opcode = IOCB_CMD_PREAD; iocb.aio_buf = (unsigned long) &c; iocb.aio_nbytes = sizeof(c); + err = io_submit(ctx, 1, &iocbp); break; default: - printk("Bogus op in do_aio - %d\n", aio->type); + printk("Bogus op in do_aio - %d\n", type); err = -EINVAL; - goto out; + break; } - err = io_submit(ctx, 1, &iocbp); if(err > 0) err = 0; else err = -errno; - out: return err; } @@ -129,9 +117,8 @@ static aio_context_t ctx = 0; static int aio_thread(void *arg) { struct aio_thread_reply reply; - struct aio_context *aio; struct io_event event; - int err, n; + int err, n, reply_fd; signal(SIGWINCH, SIG_IGN); @@ -144,22 +131,14 @@ static int aio_thread(void *arg) "errno = %d\n", errno); } else { - /* This is safe as we've just a pointer here. */ - aio = (struct aio_context *) (long) event.data; - if(update_aio(aio, event.res)){ - do_aio(ctx, aio); - continue; - } - reply = ((struct aio_thread_reply) - { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, - sizeof(reply)); + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = os_write_file(reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) - printk("aio_thread - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, - -err); + printk("aio_thread - write failed, fd = %d, " + "err = %d\n", aio_req_fd_r, -err); } } return 0; @@ -167,35 +146,35 @@ static int aio_thread(void *arg) #endif -static int do_not_aio(struct aio_context *aio) +static int do_not_aio(struct aio_thread_req *req) { char c; int err; - switch(aio->type){ + switch(req->type){ case AIO_READ: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, aio->data, aio->len); + err = os_read_file(req->io_fd, req->buf, req->len); break; case AIO_WRITE: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_write_file(aio->fd, aio->data, aio->len); + err = os_write_file(req->io_fd, req->buf, req->len); break; case AIO_MMAP: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, &c, sizeof(c)); + err = os_read_file(req->io_fd, &c, sizeof(c)); break; default: - printk("do_not_aio - bad request type : %d\n", aio->type); + printk("do_not_aio - bad request type : %d\n", req->type); err = -EINVAL; break; } @@ -206,14 +185,14 @@ static int do_not_aio(struct aio_context *aio) static int not_aio_thread(void *arg) { - struct aio_context *aio; + struct aio_thread_req req; struct aio_thread_reply reply; int err; signal(SIGWINCH, SIG_IGN); while(1){ - err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); - if(err != sizeof(aio)){ + err = os_read_file(aio_req_fd_r, &req, sizeof(req)); + if(err != sizeof(req)){ if(err < 0) printk("not_aio_thread - read failed, " "fd = %d, err = %d\n", aio_req_fd_r, @@ -224,34 +203,17 @@ static int not_aio_thread(void *arg) } continue; } - again: - err = do_not_aio(aio); - - if(update_aio(aio, err)) - goto again; - - reply = ((struct aio_thread_reply) { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) printk("not_aio_thread - write failed, fd = %d, " "err = %d\n", aio_req_fd_r, -err); } } -static int submit_aio_24(struct aio_context *aio) -{ - int err; - - err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); - if(err == sizeof(aio)) - err = 0; - - return err; -} - static int aio_pid = -1; -static int (*submit_proc)(struct aio_context *aio); static int init_aio_24(void) { @@ -283,33 +245,11 @@ static int init_aio_24(void) #endif printk("2.6 host AIO support not used - falling back to I/O " "thread\n"); - - submit_proc = submit_aio_24; - return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 -static int submit_aio_26(struct aio_context *aio) -{ - struct aio_thread_reply reply; - int err; - - err = do_aio(ctx, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } - - return err; -} - static int init_aio_26(void) { unsigned long stack; @@ -330,22 +270,39 @@ static int init_aio_26(void) aio_pid = err; printk("Using 2.6 host AIO\n"); - - submit_proc = submit_aio_26; - return 0; } +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } + + return err; +} + #else #define DEFAULT_24_AIO 1 -static int submit_aio_26(struct aio_context *aio) +static int init_aio_26(void) { return -ENOSYS; } -static int init_aio_26(void) +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - submit_proc = submit_aio_26; return -ENOSYS; } #endif @@ -412,7 +369,33 @@ static void exit_aio(void) __uml_exitcall(exit_aio); -int submit_aio(struct aio_context *aio) +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - return (*submit_proc)(aio); + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = os_write_file(aio_req_fd_w, &req, sizeof(req)); + if(err == sizeof(req)) + err = 0; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if(aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else { + return submit_aio_26(type, io_fd, buf, len, offset, aio); + } } From 9d624ea474a3ddf3a0702d9b47e428ce1f8488a6 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Tue, 11 Oct 2005 21:01:01 +0200 Subject: [PATCH 36/59] [PATCH] uml: compile-time fix recent patch Give an empty definition for clear_can_do_skas() when it is not needed. Thanks to Junichi Uekawa for reporting the breakage and providing a fix (I re-fixed it in an IMHO cleaner way). Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Linus Torvalds --- arch/um/include/os.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 6f766e1faecc..2e58e304b8be 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -6,6 +6,7 @@ #ifndef __OS_H__ #define __OS_H__ +#include "uml-config.h" #include "asm/types.h" #include "../os/include/file.h" @@ -159,7 +160,11 @@ extern int can_do_skas(void); /* Make sure they are clear when running in TT mode. Required by * SEGV_MAYBE_FIXABLE */ +#ifdef UML_CONFIG_MODE_SKAS #define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0) +#else +#define clear_can_do_skas() do {} while (0) +#endif /* mem.c */ extern int create_mem_file(unsigned long len); From cbd27b8ced4b1888c93f69b4dd108a69ac4d733f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Oct 2005 11:39:33 +1000 Subject: [PATCH 37/59] [PATCH] ppc32: Fix timekeeping Interestingly enough, ppc32 had broken timekeeping for ages... It worked, but probably drifted a bit more than could be explained by the actual bad precision of the timebase calibration. We discovered that recently when somebody figured out that the common code was using CLOCK_TICK_RATE to correct the timekeeing, and ppc32 had a completely bogus value for it. This patch turns it into something saner. Probably not as good as doing something based on the actual timebase frequency precision but I'll leave that sort of math to others. This at least makes it better for the common HZ values. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- include/asm-powerpc/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-powerpc/timex.h b/include/asm-powerpc/timex.h index 51c5b316be55..c02d15aced91 100644 --- a/include/asm-powerpc/timex.h +++ b/include/asm-powerpc/timex.h @@ -10,7 +10,7 @@ #include #include -#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_RATE 1024000 /* Underlying HZ */ typedef unsigned long cycles_t; From d8e998c58a870770905495a1d45ebf7285b5b1c5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Oct 2005 14:22:50 +1000 Subject: [PATCH 38/59] [PATCH] ppc32: Tell userland about lack of standard TB Glibc is about to get some new high precision timer stuff that relies on the standard timebase of the PPC architecture. However, some (rare & old) CPUs do not have such timebase and it is a bit annoying to have your stuff just crash because you are running on the wrong CPU... This exposes to userland a CPU feature bit that tells that the current processor doesn't have a standard timebase. It's negative logic so that glibc will still "just work" on older kernels (it will just be unhappy on those old CPUs but that doesn't really matter as distro tend to update glibc & kernel at the same time). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/ppc/kernel/cputable.c | 5 +++-- include/asm-ppc/cputable.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 546e1ea4cafa..6b76cf58d9e0 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -91,7 +91,7 @@ struct cpu_spec cpu_specs[] = { .cpu_features = CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE, .cpu_user_features = COMMON_PPC | PPC_FEATURE_601_INSTR | - PPC_FEATURE_UNIFIED_CACHE, + PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_601 @@ -745,7 +745,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "403GCX", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, .icache_bsize = 16, .dcache_bsize = 16, }, diff --git a/include/asm-ppc/cputable.h b/include/asm-ppc/cputable.h index 41d8f8425c04..e17c492c870b 100644 --- a/include/asm-ppc/cputable.h +++ b/include/asm-ppc/cputable.h @@ -24,6 +24,7 @@ #define PPC_FEATURE_HAS_SPE 0x00800000 #define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 #define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 #ifdef __KERNEL__ From 60ac133aac9e07b94f2cb6bf571bf8aef69248c3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 12 Oct 2005 19:51:24 +0100 Subject: [PATCH 39/59] [ARM] 2974/1: fix ARM710 swi bug workaround Patch from Nicolas Pitre Either no one is using an ARM710 with recent kernels, or all ARM710s still in use are not afflicted by this swi bug. Nevertheless, the code to work around the ARM710 swi bug is itself currently buggy since it uses r8 as a pointer to S_PC while in fact it holds the spsr content these days. Fix that, and simplify the code as well. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 81d450ac3fab..066597f4345a 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -106,15 +106,10 @@ ENTRY(ret_from_fork) .endm .Larm700bug: - ldr r0, [sp, #S_PSR] @ Get calling cpsr - sub lr, lr, #4 - str lr, [r8] - msr spsr_cxsf, r0 ldmia sp, {r0 - lr}^ @ Get calling r0 - lr mov r0, r0 - ldr lr, [sp, #S_PC] @ Get PC add sp, sp, #S_FRAME_SIZE - movs pc, lr + subs pc, lr, #4 #else .macro arm710_bug_check, instr, temp .endm From 0eea3c0b6cb356bc8e515084f831cac7e3d5131c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:05 +0100 Subject: [PATCH 40/59] [ARM] 2975/1: S3C2410: time.c missing include of cpu.h Patch from Ben Dooks arch/arm/mach-s3c2410/time.c is missing include of cpu.h, causing the declaration of the timer struct (s3c24xx_timer) to be flagged as missing the declaration. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c index c0acfb2ad790..8a00e3c3cd08 100644 --- a/arch/arm/mach-s3c2410/time.c +++ b/arch/arm/mach-s3c2410/time.c @@ -38,6 +38,7 @@ #include #include "clock.h" +#include "cpu.h" static unsigned long timer_startval; static unsigned long timer_usec_ticks; From 17efa644f624a521e4a6c6a4641d39d227a9b24a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:06 +0100 Subject: [PATCH 41/59] [ARM] 2976/1: S3C2410: add static to functions in serial driver Patch from Ben Dooks The s3c2410 serial driver is missing static declerations on several functions that are not exported, and have no need of being exported outside the driver Signed-off-by: Ben Dooks Signed-off-by: Russell King --- drivers/serial/s3c2410.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c index eff2158024c8..52692aa345ec 100644 --- a/drivers/serial/s3c2410.c +++ b/drivers/serial/s3c2410.c @@ -1092,8 +1092,8 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport, static int probe_index = 0; -int s3c24xx_serial_probe(struct device *_dev, - struct s3c24xx_uart_info *info) +static int s3c24xx_serial_probe(struct device *_dev, + struct s3c24xx_uart_info *info) { struct s3c24xx_uart_port *ourport; struct platform_device *dev = to_platform_device(_dev); @@ -1120,7 +1120,7 @@ int s3c24xx_serial_probe(struct device *_dev, return ret; } -int s3c24xx_serial_remove(struct device *_dev) +static int s3c24xx_serial_remove(struct device *_dev) { struct uart_port *port = s3c24xx_dev_to_port(_dev); @@ -1134,7 +1134,8 @@ int s3c24xx_serial_remove(struct device *_dev) #ifdef CONFIG_PM -int s3c24xx_serial_suspend(struct device *dev, pm_message_t state, u32 level) +static int s3c24xx_serial_suspend(struct device *dev, pm_message_t state, + u32 level) { struct uart_port *port = s3c24xx_dev_to_port(dev); @@ -1144,7 +1145,7 @@ int s3c24xx_serial_suspend(struct device *dev, pm_message_t state, u32 level) return 0; } -int s3c24xx_serial_resume(struct device *dev, u32 level) +static int s3c24xx_serial_resume(struct device *dev, u32 level) { struct uart_port *port = s3c24xx_dev_to_port(dev); struct s3c24xx_uart_port *ourport = to_ourport(port); @@ -1165,8 +1166,8 @@ int s3c24xx_serial_resume(struct device *dev, u32 level) #define s3c24xx_serial_resume NULL #endif -int s3c24xx_serial_init(struct device_driver *drv, - struct s3c24xx_uart_info *info) +static int s3c24xx_serial_init(struct device_driver *drv, + struct s3c24xx_uart_info *info) { dbg("s3c24xx_serial_init(%p,%p)\n", drv, info); return driver_register(drv); From a7b1bbbc89194deba8cde02200f08b3840c9daa2 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:07 +0100 Subject: [PATCH 42/59] [ARM] 2977/1: armksyms.c - make items in export table static Patch from Ben Dooks The items in the export table do not need to be exported elsehwere, so quash the sparse warning by making the symbol for the table entry static. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/kernel/armksyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 835d450797a1..7b17a87a3311 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -45,8 +45,8 @@ extern void fp_enter(void); #define EXPORT_SYMBOL_ALIAS(sym,orig) \ EXPORT_CRC_ALIAS(sym) \ - const struct kernel_symbol __ksymtab_##sym \ - __attribute__((section("__ksymtab"))) = \ + static const struct kernel_symbol __ksymtab_##sym \ + __attribute_used__ __attribute__((section("__ksymtab"))) = \ { (unsigned long)&orig, #sym }; /* From 9f693d7b149a74bac301ee47136359294cffed25 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:07 +0100 Subject: [PATCH 43/59] [ARM] 2979/2: S3C2410 - add static to non-exported machine items Patch from Ben Dooks Do not export items that are not needed by symbol name elsewhere Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/mach-anubis.c | 2 +- arch/arm/mach-s3c2410/mach-bast.c | 4 ++-- arch/arm/mach-s3c2410/mach-vr1000.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c index 7c05f27fe1d6..5ae80f4e3e67 100644 --- a/arch/arm/mach-s3c2410/mach-anubis.c +++ b/arch/arm/mach-s3c2410/mach-anubis.c @@ -125,7 +125,7 @@ static int external_map[] = { 2 }; static int chip0_map[] = { 0 }; static int chip1_map[] = { 1 }; -struct mtd_partition anubis_default_nand_part[] = { +static struct mtd_partition anubis_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index ed1f07d7252f..8ca955984645 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -230,7 +230,7 @@ static int chip0_map[] = { 1 }; static int chip1_map[] = { 2 }; static int chip2_map[] = { 3 }; -struct mtd_partition bast_default_nand_part[] = { +static struct mtd_partition bast_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, @@ -340,7 +340,7 @@ static struct resource bast_dm9k_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data bast_dm9k_platdata = { +static struct dm9000_plat_data bast_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY }; diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c index 663a7f98fc0b..46b259673c18 100644 --- a/arch/arm/mach-s3c2410/mach-vr1000.c +++ b/arch/arm/mach-s3c2410/mach-vr1000.c @@ -288,7 +288,7 @@ static struct resource vr1000_dm9k1_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data vr1000_dm9k_platdata = { +static struct dm9000_plat_data vr1000_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY, }; From ceca629e0b4858d6b8bff260dab2e947d31aca56 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Oct 2005 19:58:08 +0100 Subject: [PATCH 44/59] [ARM] 2971/1: i.MX uart handle rts irq Patch from Sascha Hauer handle rts interrupt Signed-off-by: Giancarlo Formicuccia Signed-off-by: Sascha Hauer --- drivers/serial/imx.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 53e0323d4b83..bdb4e454b8b0 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -73,7 +73,7 @@ struct imx_port { struct uart_port port; struct timer_list timer; unsigned int old_status; - int txirq,rxirq; + int txirq,rxirq,rtsirq; }; /* @@ -181,6 +181,22 @@ static void imx_start_tx(struct uart_port *port) imx_transmit_buffer(sport); } +static irqreturn_t imx_rtsint(int irq, void *dev_id, struct pt_regs *regs) +{ + struct imx_port *sport = (struct imx_port *)dev_id; + unsigned int val = USR1((u32)sport->port.membase)&USR1_RTSS; + unsigned long flags; + + spin_lock_irqsave(&sport->port.lock, flags); + + USR1((u32)sport->port.membase) = USR1_RTSD; + uart_handle_cts_change(&sport->port, !!val); + wake_up_interruptible(&sport->port.info->delta_msr_wait); + + spin_unlock_irqrestore(&sport->port.lock, flags); + return IRQ_HANDLED; +} + static irqreturn_t imx_txint(int irq, void *dev_id, struct pt_regs *regs) { struct imx_port *sport = (struct imx_port *)dev_id; @@ -386,15 +402,21 @@ static int imx_startup(struct uart_port *port) if (retval) goto error_out1; retval = request_irq(sport->txirq, imx_txint, 0, - "imx-uart", sport); + DRIVER_NAME, sport); if (retval) goto error_out2; + retval = request_irq(sport->rtsirq, imx_rtsint, 0, + DRIVER_NAME, sport); + if (retval) goto error_out3; + set_irq_type(sport->rtsirq, IRQT_BOTHEDGE); + /* * Finally, clear and enable interrupts */ + USR1((u32)sport->port.membase) = USR1_RTSD; UCR1((u32)sport->port.membase) |= - (UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_UARTEN); + (UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); UCR2((u32)sport->port.membase) |= (UCR2_RXEN | UCR2_TXEN); /* @@ -406,6 +428,8 @@ static int imx_startup(struct uart_port *port) return 0; +error_out3: + free_irq(sport->txirq, sport); error_out2: free_irq(sport->rxirq, sport); error_out1: @@ -424,6 +448,7 @@ static void imx_shutdown(struct uart_port *port) /* * Free the interrupts */ + free_irq(sport->rtsirq, sport); free_irq(sport->txirq, sport); free_irq(sport->rxirq, sport); @@ -432,7 +457,7 @@ static void imx_shutdown(struct uart_port *port) */ UCR1((u32)sport->port.membase) &= - ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_UARTEN); + ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); } static void @@ -522,7 +547,7 @@ imx_set_termios(struct uart_port *port, struct termios *termios, * disable interrupts and drain transmitter */ old_ucr1 = UCR1((u32)sport->port.membase); - UCR1((u32)sport->port.membase) &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN); + UCR1((u32)sport->port.membase) &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN); while ( !(USR2((u32)sport->port.membase) & USR2_TXDC)) barrier(); @@ -643,6 +668,7 @@ static struct imx_port imx_ports[] = { { .txirq = UART1_MINT_TX, .rxirq = UART1_MINT_RX, + .rtsirq = UART1_MINT_RTS, .port = { .type = PORT_IMX, .iotype = SERIAL_IO_MEM, @@ -658,6 +684,7 @@ static struct imx_port imx_ports[] = { }, { .txirq = UART2_MINT_TX, .rxirq = UART2_MINT_RX, + .rtsirq = UART2_MINT_RTS, .port = { .type = PORT_IMX, .iotype = SERIAL_IO_MEM, @@ -737,7 +764,7 @@ imx_console_write(struct console *co, const char *s, unsigned int count) UCR1((u32)sport->port.membase) = (old_ucr1 | UCR1_UARTCLKEN | UCR1_UARTEN) - & ~(UCR1_TXMPTYEN | UCR1_RRDYEN); + & ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN); UCR2((u32)sport->port.membase) = old_ucr2 | UCR2_TXEN; /* From cd26f45bfca4d4fa5ddfe21613d2da46f1acb821 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Wed, 12 Oct 2005 19:58:09 +0100 Subject: [PATCH 45/59] [ARM] 2970/1: Use -mtune=arm1136j-s when building for CPU_V6 targets Patch from George G. Davis When building for CPU_V6 targets, we should use -mtune=arm1136j-s rather than -mtune=strongarm but fall back to the later in case someone is using an older toolchain (although they should really upgrade instead). Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7779f2d1acad..299bc0468702 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -53,7 +53,7 @@ tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_V6) :=-mtune=strongarm +tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) # Need -Uarm for gcc < 3.x CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) From 737d0bb7701cdebb661e4db0236071a7df977777 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Wed, 12 Oct 2005 19:58:10 +0100 Subject: [PATCH 46/59] [ARM] 2969/1: miscellaneous whitespace cleanup Patch from George G. Davis Fix leading, trailing and other miscellaneous whitespace issues in arch/arm/kernel/alignment.c. Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/mm/alignment.c | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f35e69e9c65c..705c98921c37 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -111,7 +111,7 @@ proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, } static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { char mode; @@ -119,7 +119,7 @@ static int proc_alignment_write(struct file *file, const char __user *buffer, if (get_user(mode, buffer)) return -EFAULT; if (mode >= '0' && mode <= '5') - ai_usermode = mode - '0'; + ai_usermode = mode - '0'; } return count; } @@ -262,7 +262,7 @@ union offset_union { goto fault; \ } while (0) -#define put32_unaligned_check(val,addr) \ +#define put32_unaligned_check(val,addr) \ __put32_unaligned_check("strb", val, addr) #define put32t_unaligned_check(val,addr) \ @@ -306,19 +306,19 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r return TYPE_LDST; user: - if (LDST_L_BIT(instr)) { - unsigned long val; - get16t_unaligned_check(val, addr); + if (LDST_L_BIT(instr)) { + unsigned long val; + get16t_unaligned_check(val, addr); - /* signed half-word? */ - if (instr & 0x40) - val = (signed long)((signed short) val); + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); - regs->uregs[rd] = val; - } else - put16t_unaligned_check(regs->uregs[rd], addr); + regs->uregs[rd] = val; + } else + put16t_unaligned_check(regs->uregs[rd], addr); - return TYPE_LDST; + return TYPE_LDST; fault: return TYPE_FAULT; @@ -342,11 +342,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32_unaligned_check(val, addr); regs->uregs[rd] = val; - get32_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32_unaligned_check(regs->uregs[rd], addr); - put32_unaligned_check(regs->uregs[rd+1], addr+4); + put32_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -356,11 +356,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32t_unaligned_check(val, addr); regs->uregs[rd] = val; - get32t_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32t_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32t_unaligned_check(regs->uregs[rd], addr); - put32t_unaligned_check(regs->uregs[rd+1], addr+4); + put32t_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -443,7 +443,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg if (LDST_P_EQ_U(instr)) /* U = P */ eaddr += 4; - /* + /* * For alignment faults on the ARM922T/ARM920T the MMU makes * the FSR (and hence addr) equal to the updated base address * of the multiple access rather than the restored value. @@ -570,7 +570,7 @@ thumb2arm(u16 tinstr) /* 6.5.1 Format 3: */ case 0x4800 >> 11: /* 7.1.28 LDR(3) */ /* NOTE: This case is not technically possible. We're - * loading 32-bit memory data via PC relative + * loading 32-bit memory data via PC relative * addressing mode. So we can and should eliminate * this case. But I'll leave it here for now. */ @@ -642,7 +642,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (fault) { type = TYPE_FAULT; - goto bad_or_fault; + goto bad_or_fault; } if (user_mode(regs)) From 6ec5e7f3656f0397b7e8b39a7dcc77937d187596 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 19:58:10 +0100 Subject: [PATCH 47/59] [ARM] 2978/1: nwfpe - clean up sparse errors Patch from Ben Dooks The NWFPE is producing a number of errors from sparse due to not defining a number of functions in the header files. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/nwfpe/fpa11.c | 5 ----- arch/arm/nwfpe/fpa11.h | 20 ++++++++++++++++++++ arch/arm/nwfpe/fpa11_cprt.c | 3 +-- arch/arm/nwfpe/fpopcode.h | 6 ++++++ arch/arm/nwfpe/softfloat.h | 3 +++ 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c index 7690f731ee87..7b3d74d73c80 100644 --- a/arch/arm/nwfpe/fpa11.c +++ b/arch/arm/nwfpe/fpa11.c @@ -31,11 +31,6 @@ #include #include -/* forward declarations */ -unsigned int EmulateCPDO(const unsigned int); -unsigned int EmulateCPDT(const unsigned int); -unsigned int EmulateCPRT(const unsigned int); - /* Reset the FPA11 chip. Called to initialize and reset the emulator. */ static void resetFPA11(void) { diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h index 93523ae4b7a1..9677ae8448e8 100644 --- a/arch/arm/nwfpe/fpa11.h +++ b/arch/arm/nwfpe/fpa11.h @@ -95,4 +95,24 @@ extern int8 SetRoundingMode(const unsigned int); extern int8 SetRoundingPrecision(const unsigned int); extern void nwfpe_init_fpa(union fp_state *fp); +extern unsigned int EmulateAll(unsigned int opcode); + +extern unsigned int EmulateCPDT(const unsigned int opcode); +extern unsigned int EmulateCPDO(const unsigned int opcode); +extern unsigned int EmulateCPRT(const unsigned int opcode); + +/* fpa11_cpdt.c */ +extern unsigned int PerformLDF(const unsigned int opcode); +extern unsigned int PerformSTF(const unsigned int opcode); +extern unsigned int PerformLFM(const unsigned int opcode); +extern unsigned int PerformSFM(const unsigned int opcode); + +/* single_cpdo.c */ + +extern unsigned int SingleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); +/* double_cpdo.c */ +extern unsigned int DoubleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); + #endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c index adf8d3000540..7c67023655e4 100644 --- a/arch/arm/nwfpe/fpa11_cprt.c +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -26,12 +26,11 @@ #include "fpa11.inl" #include "fpmodule.h" #include "fpmodule.inl" +#include "softfloat.h" #ifdef CONFIG_FPE_NWFPE_XP extern flag floatx80_is_nan(floatx80); #endif -extern flag float64_is_nan(float64); -extern flag float32_is_nan(float32); unsigned int PerformFLT(const unsigned int opcode); unsigned int PerformFIX(const unsigned int opcode); diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h index 1777e92a88e6..6528e081c83f 100644 --- a/arch/arm/nwfpe/fpopcode.h +++ b/arch/arm/nwfpe/fpopcode.h @@ -476,4 +476,10 @@ static inline unsigned int getDestinationSize(const unsigned int opcode) return (nRc); } +extern unsigned int checkCondition(const unsigned int opcode, + const unsigned int ccodes); + +extern const float64 float64Constant[]; +extern const float32 float32Constant[]; + #endif diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h index 1c8799b9ee4d..14151700b6b2 100644 --- a/arch/arm/nwfpe/softfloat.h +++ b/arch/arm/nwfpe/softfloat.h @@ -265,4 +265,7 @@ static inline flag float64_lt_nocheck(float64 a, float64 b) return (a != b) && (aSign ^ (a < b)); } +extern flag float32_is_nan( float32 a ); +extern flag float64_is_nan( float64 a ); + #endif From e6158b4a5647624ceb90074bfcc248ea3152c906 Mon Sep 17 00:00:00 2001 From: Lothar Wassmann Date: Wed, 12 Oct 2005 19:58:11 +0100 Subject: [PATCH 48/59] [ARM] 3002/1: Wrong parameter to uart_update_timeout() in drivers/serial/pxa.c Patch from Lothar Wassmann The function serial_pxa_set_termios() is calling uart_update_timeout() with the baud rate divisor as third parameter, while uart_update_timeout() expects the baud rate in this place. This results in a bogus port->timeout which is proportional to the baud rate. Signed-off-by: Lothar Wassmann Signed-off-by: Russell King --- drivers/serial/pxa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c index 672b359b07ce..90c2a86c421b 100644 --- a/drivers/serial/pxa.c +++ b/drivers/serial/pxa.c @@ -499,7 +499,7 @@ serial_pxa_set_termios(struct uart_port *port, struct termios *termios, /* * Update the per-port timeout. */ - uart_update_timeout(port, termios->c_cflag, quot); + uart_update_timeout(port, termios->c_cflag, baud); up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (termios->c_iflag & INPCK) From a451e28c7642830d8b066e5a13de46934151ce3a Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 12 Oct 2005 19:58:12 +0100 Subject: [PATCH 49/59] [ARM] 3003/1: SSP channel map register updates for pxa2xx Patch from Liam Girdwood This patch updates the pxa2xx channel map registers definitions in pxa-regs.h Changes:- o Added description for SSP2 registers o Added definitions for SSP3 registers Signed-off-by:Liam Girdwood Signed-off-by: Russell King --- include/asm-arm/arch-pxa/pxa-regs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 939d9e5020a0..13fa2deb4ddd 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -126,8 +126,8 @@ #define DRCMR12 __REG(0x40000130) /* Request to Channel Map Register for AC97 audio transmit Request */ #define DRCMR13 __REG(0x40000134) /* Request to Channel Map Register for SSP receive Request */ #define DRCMR14 __REG(0x40000138) /* Request to Channel Map Register for SSP transmit Request */ -#define DRCMR15 __REG(0x4000013c) /* Reserved */ -#define DRCMR16 __REG(0x40000140) /* Reserved */ +#define DRCMR15 __REG(0x4000013c) /* Request to Channel Map Register for SSP2 receive Request */ +#define DRCMR16 __REG(0x40000140) /* Request to Channel Map Register for SSP2 transmit Request */ #define DRCMR17 __REG(0x40000144) /* Request to Channel Map Register for ICP receive Request */ #define DRCMR18 __REG(0x40000148) /* Request to Channel Map Register for ICP transmit Request */ #define DRCMR19 __REG(0x4000014c) /* Request to Channel Map Register for STUART receive Request */ @@ -151,7 +151,8 @@ #define DRCMR37 __REG(0x40000194) /* Request to Channel Map Register for USB endpoint 13 Request */ #define DRCMR38 __REG(0x40000198) /* Request to Channel Map Register for USB endpoint 14 Request */ #define DRCMR39 __REG(0x4000019C) /* Reserved */ - +#define DRCMR66 __REG(0x40001108) /* Request to Channel Map Register for SSP3 receive Request */ +#define DRCMR67 __REG(0x4000110C) /* Request to Channel Map Register for SSP3 transmit Request */ #define DRCMR68 __REG(0x40001110) /* Request to Channel Map Register for Camera FIFO 0 Request */ #define DRCMR69 __REG(0x40001114) /* Request to Channel Map Register for Camera FIFO 1 Request */ #define DRCMR70 __REG(0x40001118) /* Request to Channel Map Register for Camera FIFO 2 Request */ From c9c10830740df1b5e7848d6fbb68c93a73e8f7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 12 Oct 2005 12:22:46 -0700 Subject: [PATCH 50/59] [SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller --- arch/sparc64/kernel/dtlb_base.S | 14 +-- arch/sparc64/kernel/dtlb_prot.S | 12 +-- arch/sparc64/kernel/head.S | 61 +++++------ arch/sparc64/kernel/itlb_base.S | 26 ++--- arch/sparc64/kernel/ktlb.S | 84 +++++++-------- arch/sparc64/mm/init.c | 185 +++++++++++++------------------- 6 files changed, 161 insertions(+), 221 deletions(-) diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S index 702d349c1e88..6528786840c0 100644 --- a/arch/sparc64/kernel/dtlb_base.S +++ b/arch/sparc64/kernel/dtlb_base.S @@ -53,19 +53,18 @@ * be guaranteed to be 0 ... mmu_context.h does guarantee this * by only using 10 bits in the hwcontext value. */ -#define CREATE_VPTE_OFFSET1(r1, r2) +#define CREATE_VPTE_OFFSET1(r1, r2) nop #define CREATE_VPTE_OFFSET2(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_NOP nop #else #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* DTLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? from_tl1_trap: @@ -74,18 +73,16 @@ from_tl1_trap: be,pn %xcc, kvmap ! Yep, special processing CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset cmp %g5, 4 ! Last trap level? - be,pn %xcc, longpath ! Yep, cannot risk VPTE miss - nop ! delay slot /* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ + be,pn %xcc, longpath ! Yep, cannot risk VPTE miss + nop ! delay slot ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE 1: brgez,pn %g5, longpath ! Invalid, branch out nop ! Delay-slot 9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB retry ! Trap return nop - nop - nop /* DTLB ** ICACHE line 3: winfixups+real_faults */ longpath: @@ -106,8 +103,7 @@ longpath: nop nop nop - CREATE_VPTE_NOP + nop #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/dtlb_prot.S b/arch/sparc64/kernel/dtlb_prot.S index d848bb7374bb..e0a920162604 100644 --- a/arch/sparc64/kernel/dtlb_prot.S +++ b/arch/sparc64/kernel/dtlb_prot.S @@ -14,14 +14,14 @@ */ /* PROT ** ICACHE line 1: User DTLB protection trap */ - stxa %g0, [%g1] ASI_DMMU ! Clear SFSR FaultValid bit - membar #Sync ! Synchronize ASI stores - rdpr %pstate, %g5 ! Move into alternate globals + mov TLB_SFSR, %g1 + stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit + membar #Sync ! Synchronize stores + rdpr %pstate, %g5 ! Move into alt-globals wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tl, %g1 ! Need to do a winfixup? + rdpr %tl, %g1 ! Need a winfixup? cmp %g1, 1 ! Trap level >1? - mov TLB_TAG_ACCESS, %g4 ! Prepare reload of vaddr - nop + mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 4c942f71184d..b49dcd4504b0 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -28,19 +28,14 @@ #include /* This section from from _start to sparc64_boot_end should fit into - * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space - * with bootup_user_stack, which is from 0x0000.0000.0040.4000 to - * 0x0000.0000.0040.6000 and empty_bad_page, which is from - * 0x0000.0000.0040.6000 to 0x0000.0000.0040.8000. + * 0x0000000000404000 to 0x0000000000408000. */ - .text .globl start, _start, stext, _stext _start: start: _stext: stext: -bootup_user_stack: ! 0x0000000000404000 b sparc64_boot flushw /* Flush register file. */ @@ -392,31 +387,30 @@ tlb_fixup_done: * former does use this code, the latter does not yet due * to some complexities. That should be fixed up at some * point. + * + * There used to be enormous complexity wrt. transferring + * over from the firwmare's trap table to the Linux kernel's. + * For example, there was a chicken & egg problem wrt. building + * the OBP page tables, yet needing to be on the Linux kernel + * trap table (to translate PAGE_OFFSET addresses) in order to + * do that. + * + * We now handle OBP tlb misses differently, via linear lookups + * into the prom_trans[] array. So that specific problem no + * longer exists. Yet, unfortunately there are still some issues + * preventing trampoline.S from using this code... ho hum. */ .globl setup_trap_table setup_trap_table: save %sp, -192, %sp - /* Force interrupts to be disabled. Transferring over to - * the Linux trap table is a very delicate operation. - * Until we are actually on the Linux trap table, we cannot - * get the PAGE_OFFSET linear mappings translated. We need - * that mapping to be setup in order to initialize the firmware - * page tables. - * - * So there is this window of time, from the return from - * prom_set_trap_table() until inherit_prom_mappings_post() - * (in arch/sparc64/mm/init.c) completes, during which no - * firmware address space accesses can be made. - */ + /* Force interrupts to be disabled. */ rdpr %pstate, %o1 andn %o1, PSTATE_IE, %o1 wrpr %o1, 0x0, %pstate wrpr %g0, 15, %pil - /* Ok, now make the final valid firmware call to jump over - * to the Linux trap table. - */ + /* Make the firmware call to jump over to the Linux trap table. */ call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 @@ -540,15 +534,21 @@ setup_tba: /* i0 = is_starfire */ ret restore +sparc64_boot_end: + +#include "systbls.S" +#include "ktlb.S" +#include "etrap.S" +#include "rtrap.S" +#include "winfixup.S" +#include "entry.S" /* - * The following skips make sure the trap table in ttable.S is aligned + * The following skip makes sure the trap table in ttable.S is aligned * on a 32K boundary as required by the v9 specs for TBA register. */ -sparc64_boot_end: - .skip 0x2000 + _start - sparc64_boot_end -bootup_user_stack_end: - .skip 0x2000 +1: + .skip 0x4000 + _start - 1b #ifdef CONFIG_SBUS /* This is just a hack to fool make depend config.h discovering @@ -560,15 +560,6 @@ bootup_user_stack_end: ! 0x0000000000408000 #include "ttable.S" -#include "systbls.S" -#include "ktlb.S" -#include "etrap.S" -#include "rtrap.S" -#include "winfixup.S" -#include "entry.S" - - /* This is just anal retentiveness on my part... */ - .align 16384 .data .align 8 diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S index b5e32dfa4fbc..4951ff8f6877 100644 --- a/arch/sparc64/kernel/itlb_base.S +++ b/arch/sparc64/kernel/itlb_base.S @@ -15,14 +15,12 @@ */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) -#define CREATE_VPTE_NOP nop +#define CREATE_VPTE_OFFSET2(r1, r2) nop #else /* PAGE_SHIFT */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* PAGE_SHIFT */ @@ -36,6 +34,7 @@ */ /* ITLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset @@ -43,41 +42,38 @@ 1: brgez,pn %g5, 3f ! Not valid, branch out sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot andcc %g5, %g4, %g0 ! Executable? + +/* ITLB ** ICACHE line 2: Real faults */ be,pn %xcc, 3f ! Nope, branch. nop ! Delay-slot 2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB retry ! Trap return -3: rdpr %pstate, %g4 ! Move into alternate globals - -/* ITLB ** ICACHE line 2: Real faults */ +3: rdpr %pstate, %g4 ! Move into alt-globals wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate rdpr %tpc, %g5 ! And load faulting VA mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB -sparc64_realfault_common: ! Called by TL0 dtlb_miss too + +/* ITLB ** ICACHE line 3: Finish faults */ +sparc64_realfault_common: ! Called by dtlb_miss stb %g4, [%g6 + TI_FAULT_CODE] stx %g5, [%g6 + TI_FAULT_ADDR] ba,pt %xcc, etrap ! Save state 1: rd %pc, %g7 ! ... - nop - -/* ITLB ** ICACHE line 3: Finish faults + window fixups */ call do_sparc64_fault ! Call fault handler add %sp, PTREGS_OFF, %o0! Compute pt_regs arg ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state nop + +/* ITLB ** ICACHE line 4: Window fixups */ winfix_trampoline: rdpr %tpc, %g3 ! Prepare winfixup TNPC - or %g3, 0x7c, %g3 ! Compute offset to branch + or %g3, 0x7c, %g3 ! Compute branch offset wrpr %g3, %tnpc ! Write it into TNPC done ! Do it to it - -/* ITLB ** ICACHE line 4: Unused... */ nop nop nop nop - CREATE_VPTE_NOP #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 7796b37f478c..d9244d3c9f73 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -58,9 +58,6 @@ vpte_noent: done vpte_insn_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 - /* Behave as if we are at TL0. */ wrpr %g0, 1, %tl rdpr %tpc, %g4 /* Find original faulting iaddr */ @@ -71,58 +68,57 @@ vpte_insn_obp: mov TLB_SFSR, %g1 stxa %g4, [%g1 + %g1] ASI_IMMU - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop - - /* TLB load and return from trap. */ + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_ITLB_DATA_IN retry +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry + kvmap_do_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 + srlx %g4, 13, %g4 + sllx %g4, 13, %g4 - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 - - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop - - /* TLB load and return from trap. */ + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_DTLB_DATA_IN retry +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry + /* * On a first level data miss, check whether this is to the OBP range (note * that such accesses can be made by prom, as well as by kernel using diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 0d2e967c7200..1e44ee26cee8 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -105,7 +105,7 @@ static void __init read_obp_memory(const char *property, regs[i].phys_addr = base; regs[i].reg_size = size; } - sort(regs, ents, sizeof(struct linux_prom64_registers), + sort(regs, ents, sizeof(struct linux_prom64_registers), cmp_p64, NULL); } @@ -367,8 +367,11 @@ struct linux_prom_translation { unsigned long size; unsigned long data; }; -static struct linux_prom_translation prom_trans[512] __initdata; -static unsigned int prom_trans_ents __initdata; + +/* Exported for kernel TLB miss handling in ktlb.S */ +struct linux_prom_translation prom_trans[512] __read_mostly; +unsigned int prom_trans_ents __read_mostly; +unsigned int swapper_pgd_zero __read_mostly; extern unsigned long prom_boot_page; extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); @@ -378,122 +381,57 @@ extern void register_prom_callbacks(void); /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* Exported for kernel TLB miss handling in ktlb.S */ -unsigned long prom_pmd_phys __read_mostly; -unsigned int swapper_pgd_zero __read_mostly; - -static pmd_t *prompmd __read_mostly; - -#define BASE_PAGE_SIZE 8192 - /* * Translate PROM's mapping we capture at boot time into physical address. * The second parameter is only set from prom_callback() invocations. */ unsigned long prom_virt_to_phys(unsigned long promva, int *error) { - pmd_t *pmdp = prompmd + ((promva >> 23) & 0x7ff); - pte_t *ptep; - unsigned long base; + int i; - if (pmd_none(*pmdp)) { - if (error) - *error = 1; - return 0; - } - ptep = (pte_t *)__pmd_page(*pmdp) + ((promva >> 13) & 0x3ff); - if (!pte_present(*ptep)) { - if (error) - *error = 1; - return 0; - } - if (error) { - *error = 0; - return pte_val(*ptep); - } - base = pte_val(*ptep) & _PAGE_PADDR; + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; - return base + (promva & (BASE_PAGE_SIZE - 1)); + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & _PAGE_PADDR; + + if (error) + *error = 0; + return base + (promva & (8192 - 1)); + } + } + if (error) + *error = 1; + return 0UL; } /* The obp translations are saved based on 8k pagesize, since obp can * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> - * HI_OBP_ADDRESS range are handled in entry.S and do not use the vpte + * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte * scheme (also, see rant in inherit_locked_prom_mappings()). */ -static void __init build_obp_range(unsigned long start, unsigned long end, unsigned long data) -{ - unsigned long vaddr; - - for (vaddr = start; vaddr < end; vaddr += BASE_PAGE_SIZE) { - unsigned long val; - pmd_t *pmd; - pte_t *pte; - - pmd = prompmd + ((vaddr >> 23) & 0x7ff); - if (pmd_none(*pmd)) { - pte = __alloc_bootmem(BASE_PAGE_SIZE, BASE_PAGE_SIZE, - PAGE_SIZE); - if (!pte) - prom_halt(); - memset(pte, 0, BASE_PAGE_SIZE); - pmd_set(pmd, pte); - } - pte = (pte_t *) __pmd_page(*pmd) + ((vaddr >> 13) & 0x3ff); - - val = data; - - /* Clear diag TTE bits. */ - if (tlb_type == spitfire) - val &= ~0x0003fe0000000000UL; - - set_pte_at(&init_mm, vaddr, pte, - __pte(val | _PAGE_MODIFIED)); - - data += BASE_PAGE_SIZE; - } -} - static inline int in_obp_range(unsigned long vaddr) { return (vaddr >= LOW_OBP_ADDRESS && vaddr < HI_OBP_ADDRESS); } -#define OBP_PMD_SIZE 2048 -static void __init build_obp_pgtable(void) +static int cmp_ptrans(const void *a, const void *b) { - unsigned long i; + const struct linux_prom_translation *x = a, *y = b; - prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, PAGE_SIZE); - if (!prompmd) - prom_halt(); - - memset(prompmd, 0, OBP_PMD_SIZE); - - prom_pmd_phys = __pa(prompmd); - - for (i = 0; i < prom_trans_ents; i++) { - unsigned long start, end; - - if (!in_obp_range(prom_trans[i].virt)) - continue; - - start = prom_trans[i].virt; - end = start + prom_trans[i].size; - if (end > HI_OBP_ADDRESS) - end = HI_OBP_ADDRESS; - - build_obp_range(start, end, prom_trans[i].data); - } + if (x->virt > y->virt) + return 1; + if (x->virt < y->virt) + return -1; + return 0; } -/* Read OBP translations property into 'prom_trans[]'. - * Return the number of entries. - */ +/* Read OBP translations property into 'prom_trans[]'. */ static void __init read_obp_translations(void) { - int n, node; + int n, node, ents, first, last, i; node = prom_finddevice("/virtual-memory"); n = prom_getproplen(node, "translations"); @@ -515,7 +453,41 @@ static void __init read_obp_translations(void) n = n / sizeof(struct linux_prom_translation); - prom_trans_ents = n; + ents = n; + + sort(prom_trans, ents, sizeof(struct linux_prom_translation), + cmp_ptrans, NULL); + + /* Now kick out all the non-OBP entries. */ + for (i = 0; i < ents; i++) { + if (in_obp_range(prom_trans[i].virt)) + break; + } + first = i; + for (; i < ents; i++) { + if (!in_obp_range(prom_trans[i].virt)) + break; + } + last = i; + + for (i = 0; i < (last - first); i++) { + struct linux_prom_translation *src = &prom_trans[i + first]; + struct linux_prom_translation *dest = &prom_trans[i]; + + *dest = *src; + } + for (; i < ents; i++) { + struct linux_prom_translation *dest = &prom_trans[i]; + dest->virt = dest->size = dest->data = 0x0UL; + } + + prom_trans_ents = last - first; + + if (tlb_type == spitfire) { + /* Clear diag TTE bits. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data &= ~0x0003fe0000000000UL; + } } static void __init remap_kernel(void) @@ -553,21 +525,18 @@ static void __init remap_kernel(void) } -static void __init inherit_prom_mappings_pre(void) +static void __init inherit_prom_mappings(void) { read_obp_translations(); /* Now fixup OBP's idea about where we really are mapped. */ prom_printf("Remapping the kernel... "); remap_kernel(); - prom_printf("done.\n"); -} -static void __init inherit_prom_mappings_post(void) -{ - build_obp_pgtable(); + prom_printf("Registering callbacks... "); register_prom_callbacks(); + prom_printf("done.\n"); } /* The OBP specifications for sun4u mark 0xfffffffc00000000 and @@ -1519,7 +1488,7 @@ void __init paging_init(void) swapper_pgd_zero = pgd_val(swapper_pg_dir[0]); - inherit_prom_mappings_pre(); + inherit_prom_mappings(); /* Ok, we can use our TLB miss and window trap handlers safely. * We need to do a quick peek here to see if we are on StarFire @@ -1530,23 +1499,15 @@ void __init paging_init(void) extern void setup_tba(int); setup_tba(this_is_starfire); } - __flush_tlb_all(); - /* Everything from this point forward, until we are done with - * inherit_prom_mappings_post(), must complete successfully - * without calling into the firmware. The firwmare page tables - * have not been built, but we are running on the Linux kernel's - * trap table. - */ + inherit_locked_prom_mappings(1); + + __flush_tlb_all(); /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail); - inherit_prom_mappings_post(); - - inherit_locked_prom_mappings(1); - #ifdef CONFIG_DEBUG_PAGEALLOC kernel_physical_mapping_init(); #endif From ab4060e858e36129f9319ef0fa055347ad60e1d5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 12 Oct 2005 15:10:01 -0700 Subject: [PATCH 51/59] [BRIDGE]: fix race on bridge del if This fixes the RCU race on bridge delete interface. Basically, the network device has to be detached from the bridge in the first step (pre-RCU), rather than later. At that point, no more bridge traffic will come in, and the other code will not think that network device is part of a bridge. This should also fix the XEN test problems. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 91bb895375f4..defcf6a8607c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -79,7 +79,6 @@ static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; - dev->br_port = NULL; p->br = NULL; p->dev = NULL; dev_put(dev); @@ -100,6 +99,7 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; + dev->br_port = NULL; dev_set_promiscuity(dev, -1); spin_lock_bh(&br->lock); From afb997c6163b33292d31a09d6aa5cbb03ffa5bf1 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 15:12:21 -0700 Subject: [PATCH 52/59] [NETPOLL]: wrong return for null netpoll_poll_lock() When netpoll is not being used, the macro that defines the removed routing netpoll_poll_lock defines the return as zero, but the real routine returns a `void *` Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 5ade54a78dbb..ca5a8733000f 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -86,7 +86,7 @@ static inline void netpoll_poll_unlock(void *have) #else #define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) 0 +#define netpoll_poll_lock(a) NULL #define netpoll_poll_unlock(a) #endif From 9ff5c59ce278c37bca22fbf98076d199bcaf9845 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 12 Oct 2005 15:59:39 -0700 Subject: [PATCH 53/59] [TCP]: Add code to help track down "BUG at net/ipv4/tcp_output.c:438!" This is the second report of this bug. Unfortunately the first reporter hasn't been able to reproduce it since to provide more debugging info. So let's apply this patch for 2.6.14 to 1) Make this non-fatal. 2) Provide the info we need to track it down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8225e4257258..f37a50e55b68 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -435,7 +435,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss int nsize, old_factor; u16 flags; - BUG_ON(len >= skb->len); + if (unlikely(len >= skb->len)) { + printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, " + "end_seq=%u, skb->len=%u.\n", len, mss_now, + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + skb->len); + WARN_ON(1); + return 0; + } nsize = skb_headlen(skb) - len; if (nsize < 0) From 9153bd75f25ff3170f07fb9ac1fb0e718afc6fce Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 13 Oct 2005 16:46:35 +0100 Subject: [PATCH 54/59] [ARM] 3005/1: S3C2440 - add definition for s3c2440_set_dsc() call in hardware.h Patch from Ben Dooks include/asm-arm/arch-s3c2410/hardware.h was missing the definition for s3c2440_set_dsc() Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/hardware.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/asm-arm/arch-s3c2410/hardware.h b/include/asm-arm/arch-s3c2410/hardware.h index 48a39918a760..1c9de29cafef 100644 --- a/include/asm-arm/arch-s3c2410/hardware.h +++ b/include/asm-arm/arch-s3c2410/hardware.h @@ -92,6 +92,13 @@ extern unsigned int s3c2410_gpio_getpin(unsigned int pin); extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg); +#ifdef CONFIG_CPU_S3C2440 + +extern int s3c2440_set_dsc(unsigned int pin, unsigned int value); + +#endif /* CONFIG_CPU_S3C2440 */ + + #endif /* __ASSEMBLY__ */ #include From 3a8f675c40ba2d04e4fff6710db3da763436269f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 13 Oct 2005 16:46:35 +0100 Subject: [PATCH 55/59] [ARM] 3006/1: S3C2410 - arch/arm/mach-s3c2410 sparse fixes Patch from Ben Dooks Remove an unused variable from s3c2410.c and ensure that items not needed to be exported from s3c2440.c are declared static. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/s3c2410.c | 3 --- arch/arm/mach-s3c2410/s3c2440.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index 0b88993dfd27..a8bf5ec82602 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -125,9 +125,6 @@ static struct platform_device *uart_devices[] __initdata = { &s3c_uart2 }; -/* store our uart devices for the serial driver console */ -struct platform_device *s3c2410_uart_devices[3]; - static int s3c2410_uart_count = 0; /* uart registration process */ diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c index d4c8281b55f6..833fa36bce05 100644 --- a/arch/arm/mach-s3c2410/s3c2440.c +++ b/arch/arm/mach-s3c2410/s3c2440.c @@ -151,7 +151,7 @@ void __init s3c2440_init_uarts(struct s3c2410_uartcfg *cfg, int no) #ifdef CONFIG_PM -struct sleep_save s3c2440_sleep[] = { +static struct sleep_save s3c2440_sleep[] = { SAVE_ITEM(S3C2440_DSC0), SAVE_ITEM(S3C2440_DSC1), SAVE_ITEM(S3C2440_GPJDAT), @@ -260,7 +260,7 @@ void __init s3c2440_init_clocks(int xtal) * as a driver which may support both 2410 and 2440 may try and use it. */ -int __init s3c2440_core_init(void) +static int __init s3c2440_core_init(void) { return sysdev_class_register(&s3c2440_sysclass); } From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 14:41:23 -0700 Subject: [PATCH 56/59] [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original patch by Harald Welte, with feedback from Herbert Xu and testing by Sébastien Bernard. EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus are numbered linearly. That is not necessarily true. This patch fixes that up by calculating the largest possible cpu number, and allocating enough per-cpu structure space given that. Signed-off-by: David S. Miller --- arch/cris/arch-v32/kernel/smp.c | 2 ++ arch/sh/kernel/smp.c | 3 +++ include/linux/cpumask.h | 12 ++++++++++++ net/bridge/netfilter/ebtables.c | 27 +++++++++++++++++---------- net/ipv4/netfilter/arp_tables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 17 +++++++++++------ net/ipv6/netfilter/ip6_tables.c | 16 +++++++++++----- 7 files changed, 65 insertions(+), 26 deletions(-) diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 2c5cae04a95c..957f551ba5ce 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -15,6 +15,7 @@ #include #include #include +#include #define IPI_SCHEDULE 1 #define IPI_CALL 2 @@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ volatile int cpu_now_booting = 0; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 56a39d69e080..5ecefc02896a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS]; extern void per_cpu_trap_init(void); cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_online_map; static atomic_t cpus_booted = ATOMIC_INIT(0); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b15826f6e3a2..fe9778301d07 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +/* Find the highest possible smp_processor_id() */ +static inline unsigned int highest_possible_processor_id(void) +{ + unsigned int cpu, highest = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + highest = cpu; + + return highest; +} + + #endif /* __LINUX_CPUMASK_H */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c4540144f0f4..f8ffbf6e2333 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl, /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ newinfo->chainstack = (struct ebt_chainstack **) - vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + vmalloc((highest_possible_processor_id()+1) + * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack) return -ENOMEM; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { newinfo->chainstack[i] = vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack[i]) { @@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters, /* counters of cpu 0 */ memcpy(counters, oldcounters, - sizeof(struct ebt_counter) * nentries); + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ - for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { + if (cpu == 0) + continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) { counters[i].pcnt += counter_base[i].pcnt; @@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len) BUGPRINT("Entries_size never zero\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(tmp.nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); if (!newinfo) @@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) vfree(table->entries); if (table->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1040,7 +1046,7 @@ free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table) return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(table->table->nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); ret = -ENOMEM; @@ -1186,7 +1193,7 @@ free_unlock: up(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) up(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa1634256680..a7969286e6e7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -716,8 +716,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de6..75c27e92f6ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2da514b16d95..b03e90649eb5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -950,8 +951,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -973,6 +976,7 @@ replace_table(struct ip6t_table *table, unsigned int i; for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -1019,7 +1023,7 @@ get_counters(const struct ip6t_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1153,7 +1157,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1467,7 +1472,8 @@ int ip6t_register_table(struct ip6t_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; From 34cb711ba922f53cca45443b8c3c1078873cf599 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 13 Oct 2005 14:41:44 -0700 Subject: [PATCH 57/59] [NET]: Disable NET_SCH_CLK_CPU for SMP x86 hosts Opterons with frequency scaling have fully unsynchronized TSCs running at different frequencies, so using TSCs there is not a good idea. Also some other x86 boxes have this problem. gettimeofday should be good enough, so just disable it. Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- net/sched/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 45d3bc0812c8..81510da31792 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -72,9 +72,11 @@ config NET_SCH_CLK_GETTIMEOFDAY Choose this if you need a high resolution clock source but can't use the CPU's cycle counter. +# don't allow on SMP x86 because they can have unsynchronized TSCs. +# gettimeofday is a good alternative config NET_SCH_CLK_CPU bool "CPU cycle counter" - depends on X86_TSC || X86_64 || ALPHA || SPARC64 || PPC64 || IA64 + depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 help Say Y here if you want to use the CPU's cycle counter as clock source. This is a cheap and high resolution clock source, but on some From eb0d6041143fae63410c5622fef96862e6b20933 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 13 Oct 2005 14:42:04 -0700 Subject: [PATCH 58/59] [CONNECTOR]: Update documentation to match reality. Updated documentation to reflect 2.6.14 netlink changes about socket options, multicasting and group number. Please concider for 2.6.14. Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- Documentation/connector/connector.txt | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt index 54a0a14bfbe3..57a314b14cf8 100644 --- a/Documentation/connector/connector.txt +++ b/Documentation/connector/connector.txt @@ -131,3 +131,47 @@ Netlink itself is not reliable protocol, that means that messages can be lost due to memory pressure or process' receiving queue overflowed, so caller is warned must be prepared. That is why struct cn_msg [main connector's message header] contains u32 seq and u32 ack fields. + +/*****************************************/ +Userspace usage. +/*****************************************/ +2.6.14 has a new netlink socket implementation, which by default does not +allow to send data to netlink groups other than 1. +So, if to use netlink socket (for example using connector) +with different group number userspace application must subscribe to +that group. It can be achieved by following pseudocode: + +s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + +l_local.nl_family = AF_NETLINK; +l_local.nl_groups = 12345; +l_local.nl_pid = 0; + +if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { + perror("bind"); + close(s); + return -1; +} + +{ + int on = l_local.nl_groups; + setsockopt(s, 270, 1, &on, sizeof(on)); +} + +Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket +option. To drop multicast subscription one should call above socket option +with NETLINK_DROP_MEMBERSHIP parameter which is defined as 0. + +2.6.14 netlink code only allows to select a group which is less or equal to +the maximum group number, which is used at netlink_kernel_create() time. +In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use +group number 12345, you must increment CN_NETLINK_USERS to that number. +Additional 0xf numbers are allocated to be used by non-in-kernel users. + +Due to this limitation, group 0xffffffff does not work now, so one can +not use add/remove connector's group notifications, but as far as I know, +only cn_test.c test module used it. + +Some work in netlink area is still being done, so things can be changed in +2.6.15 timeframe, if it will happen, documentation will be updated for that +kernel. From 046d20b73960b7a2474b6d5e920d54c3fd7c23fe Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Oct 2005 14:42:24 -0700 Subject: [PATCH 59/59] [TCP]: Ratelimit debugging warning. Better safe than sorry. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f37a50e55b68..7114031fdc70 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -436,11 +436,13 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss u16 flags; if (unlikely(len >= skb->len)) { - printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, " - "end_seq=%u, skb->len=%u.\n", len, mss_now, - TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - skb->len); - WARN_ON(1); + if (net_ratelimit()) { + printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, " + "end_seq=%u, skb->len=%u.\n", len, mss_now, + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + skb->len); + WARN_ON(1); + } return 0; }