2012-07-31 07:23:22 +08:00
|
|
|
#ifndef __CEPH_FEATURES
|
|
|
|
#define __CEPH_FEATURES
|
|
|
|
|
|
|
|
/*
|
|
|
|
* feature bits
|
|
|
|
*/
|
|
|
|
#define CEPH_FEATURE_UID (1<<0)
|
|
|
|
#define CEPH_FEATURE_NOSRCADDR (1<<1)
|
|
|
|
#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
|
|
|
|
#define CEPH_FEATURE_FLOCK (1<<3)
|
|
|
|
#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
|
|
|
|
#define CEPH_FEATURE_MONNAMES (1<<5)
|
|
|
|
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
|
|
|
|
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
|
2013-02-27 04:23:07 +08:00
|
|
|
#define CEPH_FEATURE_OBJECTLOCATOR (1<<8)
|
|
|
|
#define CEPH_FEATURE_PGID64 (1<<9)
|
|
|
|
#define CEPH_FEATURE_INCSUBOSDMAP (1<<10)
|
|
|
|
#define CEPH_FEATURE_PGPOOL3 (1<<11)
|
|
|
|
#define CEPH_FEATURE_OSDREPLYMUX (1<<12)
|
|
|
|
#define CEPH_FEATURE_OSDENC (1<<13)
|
|
|
|
#define CEPH_FEATURE_OMAP (1<<14)
|
|
|
|
#define CEPH_FEATURE_MONENC (1<<15)
|
|
|
|
#define CEPH_FEATURE_QUERY_T (1<<16)
|
|
|
|
#define CEPH_FEATURE_INDEP_PG_MAP (1<<17)
|
2012-07-31 09:15:23 +08:00
|
|
|
#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
|
2013-02-27 04:23:07 +08:00
|
|
|
#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19)
|
|
|
|
#define CEPH_FEATURE_MON_NULLROUTE (1<<20)
|
|
|
|
#define CEPH_FEATURE_MON_GV (1<<21)
|
|
|
|
#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
|
|
|
|
#define CEPH_FEATURE_MSG_AUTH (1<<23)
|
|
|
|
#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
|
libceph: for chooseleaf rules, retry CRUSH map descent from root if leaf is failed
Add libceph support for a new CRUSH tunable recently added to Ceph servers.
Consider the CRUSH rule
step chooseleaf firstn 0 type <node_type>
This rule means that <n> replicas will be chosen in a manner such that
each chosen leaf's branch will contain a unique instance of <node_type>.
When an object is re-replicated after a leaf failure, if the CRUSH map uses
a chooseleaf rule the remapped replica ends up under the <node_type> bucket
that held the failed leaf. This causes uneven data distribution across the
storage cluster, to the point that when all the leaves but one fail under a
particular <node_type> bucket, that remaining leaf holds all the data from
its failed peers.
This behavior also limits the number of peers that can participate in the
re-replication of the data held by the failed leaf, which increases the
time required to re-replicate after a failure.
For a chooseleaf CRUSH rule, the tree descent has two steps: call them the
inner and outer descents.
If the tree descent down to <node_type> is the outer descent, and the descent
from <node_type> down to a leaf is the inner descent, the issue is that a
down leaf is detected on the inner descent, so only the inner descent is
retried.
In order to disperse re-replicated data as widely as possible across a
storage cluster after a failure, we want to retry the outer descent. So,
fix up crush_choose() to allow the inner descent to return immediately on
choosing a failed leaf. Wire this up as a new CRUSH tunable.
Note that after this change, for a chooseleaf rule, if the primary OSD
in a placement group has failed, choosing a replacement may result in
one of the other OSDs in the PG colliding with the new primary. This
requires that OSD's data for that PG to need moving as well. This
seems unavoidable but should be relatively rare.
This corresponds to ceph.git commit 88f218181a9e6d2292e2697fc93797d0f6d6e5dc.
Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Sage Weil <sage@inktank.com>
2012-12-01 00:15:25 +08:00
|
|
|
#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
|
2013-02-27 04:23:07 +08:00
|
|
|
#define CEPH_FEATURE_CREATEPOOLID (1<<26)
|
|
|
|
#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
|
|
|
|
#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
|
|
|
|
#define CEPH_FEATURE_MDSENC (1<<29)
|
2012-07-31 07:23:22 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Features supported.
|
|
|
|
*/
|
|
|
|
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
|
2013-02-24 02:41:09 +08:00
|
|
|
(CEPH_FEATURE_NOSRCADDR | \
|
|
|
|
CEPH_FEATURE_PGID64 | \
|
|
|
|
CEPH_FEATURE_PGPOOL3 | \
|
|
|
|
CEPH_FEATURE_OSDENC | \
|
|
|
|
CEPH_FEATURE_CRUSH_TUNABLES | \
|
|
|
|
CEPH_FEATURE_CRUSH_TUNABLES2 | \
|
2012-12-29 01:56:46 +08:00
|
|
|
CEPH_FEATURE_REPLY_CREATE_INODE)
|
2012-07-31 07:23:22 +08:00
|
|
|
|
|
|
|
#define CEPH_FEATURES_REQUIRED_DEFAULT \
|
2013-02-24 02:41:09 +08:00
|
|
|
(CEPH_FEATURE_NOSRCADDR | \
|
|
|
|
CEPH_FEATURE_PGID64 | \
|
|
|
|
CEPH_FEATURE_PGPOOL3 | \
|
|
|
|
CEPH_FEATURE_OSDENC)
|
2012-07-31 07:23:22 +08:00
|
|
|
#endif
|