block-5.7-2020-05-09
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl63WVAQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpkXWD/9qJgqQpPkigCCwwPHZ+phthw6gHeAgBxPH Cw6P9QB4QCdacZjQA6QH3zdxaDsCCitQRioWPgxngs1326TKYNzBi7U3eTEwiK12 cnRybLnkzei4yzYVUSJk637oOoQh3CiJLvYcJBppGFi7crpbvlQv68M2hu05vhwL R/91H62X/5UaUlc1cJV63OBk8euWzF6XNbCQQrR4ayDvz+BsV5Fs72vYa1gx7qIt as/67oTT6y4U4pd74nT4OGkxDIXbXfn2eTbh5sMNc4ilBkqMyNbf8aOHdWqXZIBd 18RKpNl6h/fiDMJ0jsGliReONLjfRBcJla68Kn1AFONMcyxcXidjptOwLOt2fYWf YMguCVMhfgxVBslzLWoQ9AWSiNVh36ycORWlCOrnRaOaQCb9OaLZ2fwibfZ0JsMd 0259Z5vA7MIUoobCc5akXOYHbpByA9FSYkKudgTYLpdjkn05kxQyA12GgJjW3sVw ZRjoUuDuZDDUct6JcLWdrlONT8st05g+qf6PCoD+Jac8HtbpqHfKJJUtYecUat75 4hGKhuvTzpuVY0wNHo3sgqKfsejQODTN6UhejNI11Zs/nx6O0ze/qoDuWZHncnKl 158le+K5rNS8SUNbDBTMWp3OX4SJm/Gsf30fOWkkt6z1iaEfKc5sCxBHvSOeBEvH M9pzy56Vtw== =73nU -----END PGP SIGNATURE----- Merge tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - a small series fixing a use-after-free of bdi name (Christoph,Yufen) - NVMe fix for a regression with the smaller CQ update (Alexey) - NVMe fix for a hang at namespace scanning error recovery (Sagi) - fix race with blk-iocost iocg->abs_vdebt updates (Tejun) * tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block: nvme: fix possible hang when ns scanning fails during error recovery nvme-pci: fix "slimmer CQ head update" bdi: add a ->dev_name field to struct backing_dev_info bdi: use bdi_dev_name() to get device name bdi: move bdi_dev_name out of line vboxsf: don't use the source name in the bdi name iocost: protect iocg->abs_vdebt with iocg->waitq.lock
This commit is contained in:
commit
0a85ed6e7f
|
@ -123,6 +123,7 @@
|
|||
#include <linux/ioprio.h>
|
||||
#include <linux/sbitmap.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
|
@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
|
|||
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
|
||||
switch (ioprio_class) {
|
||||
default:
|
||||
dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
|
||||
"bfq: bad prio class %d\n", ioprio_class);
|
||||
pr_err("bdi %s: bfq: bad prio class %d\n",
|
||||
bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
|
||||
ioprio_class);
|
||||
/* fall through */
|
||||
case IOPRIO_CLASS_NONE:
|
||||
/*
|
||||
|
|
|
@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
|
|||
{
|
||||
/* some drivers (floppy) instantiate a queue w/o disk registered */
|
||||
if (blkg->q->backing_dev_info->dev)
|
||||
return dev_name(blkg->q->backing_dev_info->dev);
|
||||
return bdi_dev_name(blkg->q->backing_dev_info);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -466,7 +466,7 @@ struct ioc_gq {
|
|||
*/
|
||||
atomic64_t vtime;
|
||||
atomic64_t done_vtime;
|
||||
atomic64_t abs_vdebt;
|
||||
u64 abs_vdebt;
|
||||
u64 last_vtime;
|
||||
|
||||
/*
|
||||
|
@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
|
|||
struct iocg_wake_ctx ctx = { .iocg = iocg };
|
||||
u64 margin_ns = (u64)(ioc->period_us *
|
||||
WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
|
||||
u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
|
||||
u64 vdebt, vshortage, expires, oexpires;
|
||||
s64 vbudget;
|
||||
u32 hw_inuse;
|
||||
|
||||
|
@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
|
|||
vbudget = now->vnow - atomic64_read(&iocg->vtime);
|
||||
|
||||
/* pay off debt */
|
||||
abs_vdebt = atomic64_read(&iocg->abs_vdebt);
|
||||
vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
|
||||
vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
|
||||
if (vdebt && vbudget > 0) {
|
||||
u64 delta = min_t(u64, vbudget, vdebt);
|
||||
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
|
||||
abs_vdebt);
|
||||
iocg->abs_vdebt);
|
||||
|
||||
atomic64_add(delta, &iocg->vtime);
|
||||
atomic64_add(delta, &iocg->done_vtime);
|
||||
atomic64_sub(abs_delta, &iocg->abs_vdebt);
|
||||
if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
|
||||
atomic64_set(&iocg->abs_vdebt, 0);
|
||||
iocg->abs_vdebt -= abs_delta;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
|
|||
u64 expires, oexpires;
|
||||
u32 hw_inuse;
|
||||
|
||||
lockdep_assert_held(&iocg->waitq.lock);
|
||||
|
||||
/* debt-adjust vtime */
|
||||
current_hweight(iocg, NULL, &hw_inuse);
|
||||
vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
|
||||
vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
|
||||
|
||||
/* clear or maintain depending on the overage */
|
||||
if (time_before_eq64(vtime, now->vnow)) {
|
||||
/*
|
||||
* Clear or maintain depending on the overage. Non-zero vdebt is what
|
||||
* guarantees that @iocg is online and future iocg_kick_delay() will
|
||||
* clear use_delay. Don't leave it on when there's no vdebt.
|
||||
*/
|
||||
if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
|
||||
blkcg_clear_delay(blkg);
|
||||
return false;
|
||||
}
|
||||
|
@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
|
|||
{
|
||||
struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
|
||||
struct ioc_now now;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&iocg->waitq.lock, flags);
|
||||
ioc_now(iocg->ioc, &now);
|
||||
iocg_kick_delay(iocg, &now, 0);
|
||||
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
|
|||
* should have woken up in the last period and expire idle iocgs.
|
||||
*/
|
||||
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
|
||||
if (!waitqueue_active(&iocg->waitq) &&
|
||||
!atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
|
||||
if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
|
||||
!iocg_is_idle(iocg))
|
||||
continue;
|
||||
|
||||
spin_lock(&iocg->waitq.lock);
|
||||
|
||||
if (waitqueue_active(&iocg->waitq) ||
|
||||
atomic64_read(&iocg->abs_vdebt)) {
|
||||
if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
|
||||
/* might be oversleeping vtime / hweight changes, kick */
|
||||
iocg_kick_waitq(iocg, &now);
|
||||
iocg_kick_delay(iocg, &now, 0);
|
||||
|
@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
|
|||
* tests are racy but the races aren't systemic - we only miss once
|
||||
* in a while which is fine.
|
||||
*/
|
||||
if (!waitqueue_active(&iocg->waitq) &&
|
||||
!atomic64_read(&iocg->abs_vdebt) &&
|
||||
if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
|
||||
time_before_eq64(vtime + cost, now.vnow)) {
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're over budget. If @bio has to be issued regardless,
|
||||
* remember the abs_cost instead of advancing vtime.
|
||||
* iocg_kick_waitq() will pay off the debt before waking more IOs.
|
||||
* We activated above but w/o any synchronization. Deactivation is
|
||||
* synchronized with waitq.lock and we won't get deactivated as long
|
||||
* as we're waiting or has debt, so we're good if we're activated
|
||||
* here. In the unlikely case that we aren't, just issue the IO.
|
||||
*/
|
||||
spin_lock_irq(&iocg->waitq.lock);
|
||||
|
||||
if (unlikely(list_empty(&iocg->active_list))) {
|
||||
spin_unlock_irq(&iocg->waitq.lock);
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're over budget. If @bio has to be issued regardless, remember
|
||||
* the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
|
||||
* off the debt before waking more IOs.
|
||||
*
|
||||
* This way, the debt is continuously paid off each period with the
|
||||
* actual budget available to the cgroup. If we just wound vtime,
|
||||
* we would incorrectly use the current hw_inuse for the entire
|
||||
* amount which, for example, can lead to the cgroup staying
|
||||
* blocked for a long time even with substantially raised hw_inuse.
|
||||
* actual budget available to the cgroup. If we just wound vtime, we
|
||||
* would incorrectly use the current hw_inuse for the entire amount
|
||||
* which, for example, can lead to the cgroup staying blocked for a
|
||||
* long time even with substantially raised hw_inuse.
|
||||
*
|
||||
* An iocg with vdebt should stay online so that the timer can keep
|
||||
* deducting its vdebt and [de]activate use_delay mechanism
|
||||
* accordingly. We don't want to race against the timer trying to
|
||||
* clear them and leave @iocg inactive w/ dangling use_delay heavily
|
||||
* penalizing the cgroup and its descendants.
|
||||
*/
|
||||
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
|
||||
atomic64_add(abs_cost, &iocg->abs_vdebt);
|
||||
iocg->abs_vdebt += abs_cost;
|
||||
if (iocg_kick_delay(iocg, &now, cost))
|
||||
blkcg_schedule_throttle(rqos->q,
|
||||
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||
spin_unlock_irq(&iocg->waitq.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
|
|||
* All waiters are on iocg->waitq and the wait states are
|
||||
* synchronized using waitq.lock.
|
||||
*/
|
||||
spin_lock_irq(&iocg->waitq.lock);
|
||||
|
||||
/*
|
||||
* We activated above but w/o any synchronization. Deactivation is
|
||||
* synchronized with waitq.lock and we won't get deactivated as
|
||||
* long as we're waiting, so we're good if we're activated here.
|
||||
* In the unlikely case that we are deactivated, just issue the IO.
|
||||
*/
|
||||
if (unlikely(list_empty(&iocg->active_list))) {
|
||||
spin_unlock_irq(&iocg->waitq.lock);
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
return;
|
||||
}
|
||||
|
||||
init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
|
||||
wait.wait.private = current;
|
||||
wait.bio = bio;
|
||||
|
@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
|
|||
struct ioc_now now;
|
||||
u32 hw_inuse;
|
||||
u64 abs_cost, cost;
|
||||
unsigned long flags;
|
||||
|
||||
/* bypass if disabled or for root cgroup */
|
||||
if (!ioc->enabled || !iocg->level)
|
||||
|
@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
|
|||
iocg->cursor = bio_end;
|
||||
|
||||
/*
|
||||
* Charge if there's enough vtime budget and the existing request
|
||||
* has cost assigned. Otherwise, account it as debt. See debt
|
||||
* handling in ioc_rqos_throttle() for details.
|
||||
* Charge if there's enough vtime budget and the existing request has
|
||||
* cost assigned.
|
||||
*/
|
||||
if (rq->bio && rq->bio->bi_iocost_cost &&
|
||||
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
|
||||
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
else
|
||||
atomic64_add(abs_cost, &iocg->abs_vdebt);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, account it as debt if @iocg is online, which it should
|
||||
* be for the vast majority of cases. See debt handling in
|
||||
* ioc_rqos_throttle() for details.
|
||||
*/
|
||||
spin_lock_irqsave(&iocg->waitq.lock, flags);
|
||||
if (likely(!list_empty(&iocg->active_list))) {
|
||||
iocg->abs_vdebt += abs_cost;
|
||||
iocg_kick_delay(iocg, &now, cost);
|
||||
} else {
|
||||
iocg_commit_bio(iocg, bio, cost);
|
||||
}
|
||||
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
|
||||
}
|
||||
|
||||
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
|
@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
|
|||
iocg->ioc = ioc;
|
||||
atomic64_set(&iocg->vtime, now.vnow);
|
||||
atomic64_set(&iocg->done_vtime, now.vnow);
|
||||
atomic64_set(&iocg->abs_vdebt, 0);
|
||||
atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
|
||||
INIT_LIST_HEAD(&iocg->active_list);
|
||||
iocg->hweight_active = HWEIGHT_WHOLE;
|
||||
|
|
|
@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
|
|||
* Don't treat an error as fatal, as we potentially already
|
||||
* have a NGUID or EUI-64.
|
||||
*/
|
||||
if (status > 0)
|
||||
if (status > 0 && !(status & NVME_SC_DNR))
|
||||
status = 0;
|
||||
goto free_data;
|
||||
}
|
||||
|
|
|
@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
|
|||
|
||||
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
|
||||
{
|
||||
if (++nvmeq->cq_head == nvmeq->q_depth) {
|
||||
u16 tmp = nvmeq->cq_head + 1;
|
||||
|
||||
if (tmp == nvmeq->q_depth) {
|
||||
nvmeq->cq_head = 0;
|
||||
nvmeq->cq_phase ^= 1;
|
||||
} else {
|
||||
nvmeq->cq_head = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
|
|||
&congestion_kb_fops);
|
||||
|
||||
snprintf(name, sizeof(name), "../../bdi/%s",
|
||||
dev_name(fsc->sb->s_bdi->dev));
|
||||
bdi_dev_name(fsc->sb->s_bdi));
|
||||
fsc->debugfs_bdi =
|
||||
debugfs_create_symlink("bdi",
|
||||
fsc->client->debugfs_dir,
|
||||
|
|
|
@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||
goto fail_free;
|
||||
}
|
||||
|
||||
err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
|
||||
err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
|
||||
if (err)
|
||||
goto fail_free;
|
||||
|
||||
|
|
|
@ -219,6 +219,7 @@ struct backing_dev_info {
|
|||
wait_queue_head_t wb_waitq;
|
||||
|
||||
struct device *dev;
|
||||
char dev_name[64];
|
||||
struct device *owner;
|
||||
|
||||
struct timer_list laptop_mode_wb_timer;
|
||||
|
|
|
@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
|
|||
(1 << WB_async_congested));
|
||||
}
|
||||
|
||||
extern const char *bdi_unknown_name;
|
||||
|
||||
static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
|
||||
{
|
||||
if (!bdi || !bdi->dev)
|
||||
return bdi_unknown_name;
|
||||
return dev_name(bdi->dev);
|
||||
}
|
||||
const char *bdi_dev_name(struct backing_dev_info *bdi);
|
||||
|
||||
#endif /* _LINUX_BACKING_DEV_H */
|
||||
|
|
|
@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->rmean = stat[0].mean;
|
||||
__entry->rmin = stat[0].min;
|
||||
|
@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->lat = div_u64(lat, 1000);
|
||||
),
|
||||
|
@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->msg = msg;
|
||||
__entry->step = step;
|
||||
|
@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->status = status;
|
||||
__entry->step = step;
|
||||
|
|
|
@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
|
|||
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
|
||||
|
||||
static struct class *bdi_class;
|
||||
const char *bdi_unknown_name = "(unknown)";
|
||||
static const char *bdi_unknown_name = "(unknown)";
|
||||
|
||||
/*
|
||||
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
|
||||
|
@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
|
|||
if (bdi->dev) /* The driver needs to use separate queues per device */
|
||||
return 0;
|
||||
|
||||
dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
|
||||
vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
|
||||
dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
|
||||
if (IS_ERR(dev))
|
||||
return PTR_ERR(dev);
|
||||
|
||||
|
@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
|
|||
}
|
||||
EXPORT_SYMBOL(bdi_put);
|
||||
|
||||
const char *bdi_dev_name(struct backing_dev_info *bdi)
|
||||
{
|
||||
if (!bdi || !bdi->dev)
|
||||
return bdi_unknown_name;
|
||||
return bdi->dev_name;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdi_dev_name);
|
||||
|
||||
static wait_queue_head_t congestion_wqh[2] = {
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
|
||||
|
|
|
@ -159,7 +159,12 @@ class IocgStat:
|
|||
else:
|
||||
self.inflight_pct = 0
|
||||
|
||||
self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
|
||||
# vdebt used to be an atomic64_t and is now u64, support both
|
||||
try:
|
||||
self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
|
||||
except:
|
||||
self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
|
||||
|
||||
self.use_delay = blkg.use_delay.counter.value_()
|
||||
self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
|
||||
|
||||
|
|
Loading…
Reference in New Issue