Simon Horman says:

====================
IPVS Updates for v4.18

please consider these IPVS enhancements for v4.18.

* Whitepace cleanup

* Add Maglev hashing algorithm as a IPVS scheduler

  Inju Song says "Implements the Google's Maglev hashing algorithm as a
  IPVS scheduler.  Basically it provides consistent hashing but offers some
  special features about disruption and load balancing.

  1) minimal disruption: when the set of destinations changes,
     a connection will likely be sent to the same destination
     as it was before.

  2) load balancing: each destination will receive an almost
     equal number of connections.

 Seel also: [3.4 Consistent Hasing] in
 https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
 "

* Fix to correct implementation of Knuth's multiplicative hashing
  which is used in sh/dh/lblc/lblcr algorithms. Instead the
  implementation provided by the hash_32() macro is used.
====================

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Pablo Neira Ayuso 2018-04-27 00:16:14 +02:00
commit 146cd6b5d5
10 changed files with 593 additions and 6 deletions

View File

@ -668,6 +668,7 @@ struct ip_vs_dest {
volatile unsigned int flags; /* dest status flags */ volatile unsigned int flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */ atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */ atomic_t weight; /* server weight */
atomic_t last_weight; /* server latest weight */
refcount_t refcnt; /* reference counter */ refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */ struct ip_vs_stats stats; /* statistics */

View File

@ -225,6 +225,25 @@ config IP_VS_SH
If you want to compile it in kernel, say Y. To compile it as a If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N. module, choose M here. If unsure, say N.
config IP_VS_MH
tristate "maglev hashing scheduling"
---help---
The maglev consistent hashing scheduling algorithm provides the
Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
network connections to the servers through looking up a statically
assigned special hash table called the lookup table. Maglev hashing
is to assign a preference list of all the lookup table positions
to each destination.
Through this operation, The maglev hashing gives an almost equal
share of the lookup table to each of the destinations and provides
minimal disruption by using the lookup table. When the set of
destinations changes, a connection will likely be sent to the same
destination as it was before.
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
config IP_VS_SED config IP_VS_SED
tristate "shortest expected delay scheduling" tristate "shortest expected delay scheduling"
---help--- ---help---
@ -266,6 +285,24 @@ config IP_VS_SH_TAB_BITS
needs to be large enough to effectively fit all the destinations needs to be large enough to effectively fit all the destinations
multiplied by their respective weights. multiplied by their respective weights.
comment 'IPVS MH scheduler'
config IP_VS_MH_TAB_INDEX
int "IPVS maglev hashing table index of size (the prime numbers)"
range 8 17
default 12
---help---
The maglev hashing scheduler maps source IPs to destinations
stored in a hash table. This table is assigned by a preference
list of the positions to each destination until all slots in
the table are filled. The index determines the prime for size of
the table as 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521 or 131071. When using weights to allow destinations to
receive more connections, the table is assigned an amount
proportional to the weights specified. The table needs to be large
enough to effectively fit all the destinations multiplied by their
respective weights.
comment 'IPVS application helper' comment 'IPVS application helper'
config IP_VS_FTP config IP_VS_FTP

View File

@ -33,6 +33,7 @@ obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o

View File

@ -821,6 +821,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
if (add && udest->af != svc->af) if (add && udest->af != svc->af)
ipvs->mixed_address_family_dests++; ipvs->mixed_address_family_dests++;
/* keep the last_weight with latest non-0 weight */
if (add || udest->weight != 0)
atomic_set(&dest->last_weight, udest->weight);
/* set the weight and the flags */ /* set the weight and the flags */
atomic_set(&dest->weight, udest->weight); atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;

View File

@ -43,6 +43,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/hash.h>
#include <net/ip_vs.h> #include <net/ip_vs.h>
@ -81,7 +82,7 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
addr_fold = addr->ip6[0]^addr->ip6[1]^ addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3]; addr->ip6[2]^addr->ip6[3];
#endif #endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK; return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
} }

View File

@ -48,6 +48,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/hash.h>
/* for sysctl */ /* for sysctl */
#include <linux/fs.h> #include <linux/fs.h>
@ -160,7 +161,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^ addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3]; addr->ip6[2]^addr->ip6[3];
#endif #endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK; return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
} }

View File

@ -47,6 +47,7 @@
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/hash.h>
/* for sysctl */ /* for sysctl */
#include <linux/fs.h> #include <linux/fs.h>
@ -323,7 +324,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^ addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3]; addr->ip6[2]^addr->ip6[3];
#endif #endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
} }

View File

@ -0,0 +1,540 @@
// SPDX-License-Identifier: GPL-2.0
/* IPVS: Maglev Hashing scheduling module
*
* Authors: Inju Song <inju.song@navercorp.com>
*
*/
/* The mh algorithm is to assign a preference list of all the lookup
* table positions to each destination and populate the table with
* the most-preferred position of destinations. Then it is to select
* destination with the hash key of source IP address through looking
* up a the lookup table.
*
* The algorithm is detailed in:
* [3.4 Consistent Hasing]
https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
*
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/ip_vs.h>
#include <linux/siphash.h>
#include <linux/bitops.h>
#include <linux/gcd.h>
#define IP_VS_SVC_F_SCHED_MH_FALLBACK IP_VS_SVC_F_SCHED1 /* MH fallback */
#define IP_VS_SVC_F_SCHED_MH_PORT IP_VS_SVC_F_SCHED2 /* MH use port */
struct ip_vs_mh_lookup {
struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
struct ip_vs_mh_dest_setup {
unsigned int offset; /* starting offset */
unsigned int skip; /* skip */
unsigned int perm; /* next_offset */
int turns; /* weight / gcd() and rshift */
};
/* Available prime numbers for MH table */
static int primes[] = {251, 509, 1021, 2039, 4093,
8191, 16381, 32749, 65521, 131071};
/* For IPVS MH entry hash table */
#ifndef CONFIG_IP_VS_MH_TAB_INDEX
#define CONFIG_IP_VS_MH_TAB_INDEX 12
#endif
#define IP_VS_MH_TAB_BITS (CONFIG_IP_VS_MH_TAB_INDEX / 2)
#define IP_VS_MH_TAB_INDEX (CONFIG_IP_VS_MH_TAB_INDEX - 8)
#define IP_VS_MH_TAB_SIZE primes[IP_VS_MH_TAB_INDEX]
struct ip_vs_mh_state {
struct rcu_head rcu_head;
struct ip_vs_mh_lookup *lookup;
struct ip_vs_mh_dest_setup *dest_setup;
hsiphash_key_t hash1, hash2;
int gcd;
int rshift;
};
static inline void generate_hash_secret(hsiphash_key_t *hash1,
hsiphash_key_t *hash2)
{
hash1->key[0] = 2654435761UL;
hash1->key[1] = 2654435761UL;
hash2->key[0] = 2654446892UL;
hash2->key[1] = 2654446892UL;
}
/* Helper function to determine if server is unavailable */
static inline bool is_unavailable(struct ip_vs_dest *dest)
{
return atomic_read(&dest->weight) <= 0 ||
dest->flags & IP_VS_DEST_F_OVERLOAD;
}
/* Returns hash value for IPVS MH entry */
static inline unsigned int
ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
__be16 port, hsiphash_key_t *key, unsigned int offset)
{
unsigned int v;
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
addr->ip6[2] ^ addr->ip6[3];
#endif
v = (offset + ntohs(port) + ntohl(addr_fold));
return hsiphash(&v, sizeof(v), key);
}
/* Reset all the hash buckets of the specified table. */
static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
{
int i;
struct ip_vs_mh_lookup *l;
struct ip_vs_dest *dest;
l = &s->lookup[0];
for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
dest = rcu_dereference_protected(l->dest, 1);
if (dest) {
ip_vs_dest_put(dest);
RCU_INIT_POINTER(l->dest, NULL);
}
l++;
}
}
static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
struct list_head *p;
struct ip_vs_mh_dest_setup *ds;
struct ip_vs_dest *dest;
int lw;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, skip
* permutation for the dests.
*/
if (s->gcd < 1)
return 0;
/* Set dest_setup for the dests permutation */
p = &svc->destinations;
ds = &s->dest_setup[0];
while ((p = p->next) != &svc->destinations) {
dest = list_entry(p, struct ip_vs_dest, n_list);
ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
dest->port, &s->hash1, 0) %
IP_VS_MH_TAB_SIZE;
ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
dest->port, &s->hash2, 0) %
(IP_VS_MH_TAB_SIZE - 1) + 1;
ds->perm = ds->offset;
lw = atomic_read(&dest->last_weight);
ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
ds++;
}
return 0;
}
static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
int n, c, dt_count;
unsigned long *table;
struct list_head *p;
struct ip_vs_mh_dest_setup *ds;
struct ip_vs_dest *dest, *new_dest;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, skip
* the population for the dests and reset lookup table.
*/
if (s->gcd < 1) {
ip_vs_mh_reset(s);
return 0;
}
table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
sizeof(unsigned long), GFP_KERNEL);
if (!table)
return -ENOMEM;
p = &svc->destinations;
n = 0;
dt_count = 0;
while (n < IP_VS_MH_TAB_SIZE) {
if (p == &svc->destinations)
p = p->next;
ds = &s->dest_setup[0];
while (p != &svc->destinations) {
/* Ignore added server with zero weight */
if (ds->turns < 1) {
p = p->next;
ds++;
continue;
}
c = ds->perm;
while (test_bit(c, table)) {
/* Add skip, mod IP_VS_MH_TAB_SIZE */
ds->perm += ds->skip;
if (ds->perm >= IP_VS_MH_TAB_SIZE)
ds->perm -= IP_VS_MH_TAB_SIZE;
c = ds->perm;
}
__set_bit(c, table);
dest = rcu_dereference_protected(s->lookup[c].dest, 1);
new_dest = list_entry(p, struct ip_vs_dest, n_list);
if (dest != new_dest) {
if (dest)
ip_vs_dest_put(dest);
ip_vs_dest_hold(new_dest);
RCU_INIT_POINTER(s->lookup[c].dest, new_dest);
}
if (++n == IP_VS_MH_TAB_SIZE)
goto out;
if (++dt_count >= ds->turns) {
dt_count = 0;
p = p->next;
ds++;
}
}
}
out:
kfree(table);
return 0;
}
/* Get ip_vs_dest associated with supplied parameters. */
static inline struct ip_vs_dest *
ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
const union nf_inet_addr *addr, __be16 port)
{
unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
% IP_VS_MH_TAB_SIZE;
struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
return (!dest || is_unavailable(dest)) ? NULL : dest;
}
/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
static inline struct ip_vs_dest *
ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
const union nf_inet_addr *addr, __be16 port)
{
unsigned int offset, roffset;
unsigned int hash, ihash;
struct ip_vs_dest *dest;
/* First try the dest it's supposed to go to */
ihash = ip_vs_mh_hashkey(svc->af, addr, port,
&s->hash1, 0) % IP_VS_MH_TAB_SIZE;
dest = rcu_dereference(s->lookup[ihash].dest);
if (!dest)
return NULL;
if (!is_unavailable(dest))
return dest;
IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* If the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
*/
for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
roffset) % IP_VS_MH_TAB_SIZE;
dest = rcu_dereference(s->lookup[hash].dest);
if (!dest)
break;
if (!is_unavailable(dest))
return dest;
IP_VS_DBG_BUF(6,
"MH: selected unavailable server %s:%u (offset %u), reselecting",
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
return NULL;
}
/* Assign all the hash buckets of the specified table with the service. */
static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
int ret;
if (svc->num_dests > IP_VS_MH_TAB_SIZE)
return -EINVAL;
if (svc->num_dests >= 1) {
s->dest_setup = kcalloc(svc->num_dests,
sizeof(struct ip_vs_mh_dest_setup),
GFP_KERNEL);
if (!s->dest_setup)
return -ENOMEM;
}
ip_vs_mh_permutate(s, svc);
ret = ip_vs_mh_populate(s, svc);
if (ret < 0)
goto out;
IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n",
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port));
out:
if (svc->num_dests >= 1) {
kfree(s->dest_setup);
s->dest_setup = NULL;
}
return ret;
}
static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest;
int weight;
int g = 0;
list_for_each_entry(dest, &svc->destinations, n_list) {
weight = atomic_read(&dest->last_weight);
if (weight > 0) {
if (g > 0)
g = gcd(weight, g);
else
g = weight;
}
}
return g;
}
/* To avoid assigning huge weight for the MH table,
* calculate shift value with gcd.
*/
static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
{
struct ip_vs_dest *dest;
int new_weight, weight = 0;
int mw, shift;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, return
* shift value as zero.
*/
if (gcd < 1)
return 0;
list_for_each_entry(dest, &svc->destinations, n_list) {
new_weight = atomic_read(&dest->last_weight);
if (new_weight > weight)
weight = new_weight;
}
/* Because gcd is greater than zero,
* the maximum weight and gcd are always greater than zero
*/
mw = weight / gcd;
/* shift = occupied bits of weight/gcd - MH highest bits */
shift = fls(mw) - IP_VS_MH_TAB_BITS;
return (shift >= 0) ? shift : 0;
}
static void ip_vs_mh_state_free(struct rcu_head *head)
{
struct ip_vs_mh_state *s;
s = container_of(head, struct ip_vs_mh_state, rcu_head);
kfree(s->lookup);
kfree(s);
}
static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
{
int ret;
struct ip_vs_mh_state *s;
/* Allocate the MH table for this service */
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
GFP_KERNEL);
if (!s->lookup) {
kfree(s);
return -ENOMEM;
}
generate_hash_secret(&s->hash1, &s->hash2);
s->gcd = ip_vs_mh_gcd_weight(svc);
s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
IP_VS_DBG(6,
"MH lookup table (memory=%zdbytes) allocated for current service\n",
sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
/* Assign the lookup table with current dests */
ret = ip_vs_mh_reassign(s, svc);
if (ret < 0) {
ip_vs_mh_reset(s);
ip_vs_mh_state_free(&s->rcu_head);
return ret;
}
/* No more failures, attach state */
svc->sched_data = s;
return 0;
}
static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_mh_state *s = svc->sched_data;
/* Got to clean up lookup entry here */
ip_vs_mh_reset(s);
call_rcu(&s->rcu_head, ip_vs_mh_state_free);
IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
}
static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
struct ip_vs_dest *dest)
{
struct ip_vs_mh_state *s = svc->sched_data;
s->gcd = ip_vs_mh_gcd_weight(svc);
s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
/* Assign the lookup table with the updated service */
return ip_vs_mh_reassign(s, svc);
}
/* Helper function to get port number */
static inline __be16
ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
__be16 _ports[2], *ports;
/* At this point we know that we have a valid packet of some kind.
* Because ICMP packets are only guaranteed to have the first 8
* bytes, let's just grab the ports. Fortunately they're in the
* same position for all three of the protocols we care about.
*/
switch (iph->protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
&_ports);
if (unlikely(!ports))
return 0;
if (likely(!ip_vs_iph_inverse(iph)))
return ports[0];
else
return ports[1];
default:
return 0;
}
}
/* Maglev Hashing scheduling */
static struct ip_vs_dest *
ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest;
struct ip_vs_mh_state *s;
__be16 port = 0;
const union nf_inet_addr *hash_addr;
hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "%s : Scheduling...\n", __func__);
if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT)
port = ip_vs_mh_get_port(skb, iph);
s = (struct ip_vs_mh_state *)svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK)
dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port);
else
dest = ip_vs_mh_get(svc, s, hash_addr, port);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n",
IP_VS_DBG_ADDR(svc->af, hash_addr),
ntohs(port),
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
}
/* IPVS MH Scheduler structure */
static struct ip_vs_scheduler ip_vs_mh_scheduler = {
.name = "mh",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list),
.init_service = ip_vs_mh_init_svc,
.done_service = ip_vs_mh_done_svc,
.add_dest = ip_vs_mh_dest_changed,
.del_dest = ip_vs_mh_dest_changed,
.upd_dest = ip_vs_mh_dest_changed,
.schedule = ip_vs_mh_schedule,
};
static int __init ip_vs_mh_init(void)
{
return register_ip_vs_scheduler(&ip_vs_mh_scheduler);
}
static void __exit ip_vs_mh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_mh_scheduler);
rcu_barrier();
}
module_init(ip_vs_mh_init);
module_exit(ip_vs_mh_cleanup);
MODULE_DESCRIPTION("Maglev hashing ipvs scheduler");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>");

View File

@ -436,7 +436,7 @@ static bool tcp_state_active(int state)
return tcp_state_active_table[state]; return tcp_state_active_table[state];
} }
static struct tcp_states_t tcp_states [] = { static struct tcp_states_t tcp_states[] = {
/* INPUT */ /* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
@ -459,7 +459,7 @@ static struct tcp_states_t tcp_states [] = {
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
}; };
static struct tcp_states_t tcp_states_dos [] = { static struct tcp_states_t tcp_states_dos[] = {
/* INPUT */ /* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},

View File

@ -96,7 +96,8 @@ ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
addr_fold = addr->ip6[0]^addr->ip6[1]^ addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3]; addr->ip6[2]^addr->ip6[3];
#endif #endif
return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
IP_VS_SH_TAB_BITS)) &
IP_VS_SH_TAB_MASK; IP_VS_SH_TAB_MASK;
} }