Merge branch 'for-2.6.40/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/core' of git://git.kernel.dk/linux-2.6-block: (40 commits) cfq-iosched: free cic_index if cfqd allocation fails cfq-iosched: remove unused 'group_changed' in cfq_service_tree_add() cfq-iosched: reduce bit operations in cfq_choose_req() cfq-iosched: algebraic simplification in cfq_prio_to_maxrq() blk-cgroup: Initialize ioc->cgroup_changed at ioc creation time block: move bd_set_size() above rescan_partitions() in __blkdev_get() block: call elv_bio_merged() when merged cfq-iosched: Make IO merge related stats per cpu cfq-iosched: Fix a memory leak of per cpu stats for root group backing-dev: Kill set but not used var in bdi_debug_stats_show() block: get rid of on-stack plugging debug checks blk-throttle: Make no throttling rule group processing lockless blk-cgroup: Make cgroup stat reset path blkg->lock free for dispatch stats blk-cgroup: Make 64bit per cpu stats safe on 32bit arch blk-throttle: Make dispatch stats per cpu blk-throttle: Free up a group only after one rcu grace period blk-throttle: Use helper function to add root throtl group to lists blk-throttle: Introduce a helper function to fill in device details blk-throttle: Dynamically allocate root group blk-cgroup: Allow sleeping while dynamically allocating a group ...
This commit is contained in:
commit
798ce8f1cc
|
@ -142,3 +142,67 @@ Description:
|
||||||
with the previous I/O request are enabled. When set to 2,
|
with the previous I/O request are enabled. When set to 2,
|
||||||
all merge tries are disabled. The default value is 0 -
|
all merge tries are disabled. The default value is 0 -
|
||||||
which enables all types of merge tries.
|
which enables all types of merge tries.
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/discard_alignment
|
||||||
|
Date: May 2011
|
||||||
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
|
Description:
|
||||||
|
Devices that support discard functionality may
|
||||||
|
internally allocate space in units that are bigger than
|
||||||
|
the exported logical block size. The discard_alignment
|
||||||
|
parameter indicates how many bytes the beginning of the
|
||||||
|
device is offset from the internal allocation unit's
|
||||||
|
natural alignment.
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/<partition>/discard_alignment
|
||||||
|
Date: May 2011
|
||||||
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
|
Description:
|
||||||
|
Devices that support discard functionality may
|
||||||
|
internally allocate space in units that are bigger than
|
||||||
|
the exported logical block size. The discard_alignment
|
||||||
|
parameter indicates how many bytes the beginning of the
|
||||||
|
partition is offset from the internal allocation unit's
|
||||||
|
natural alignment.
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/discard_granularity
|
||||||
|
Date: May 2011
|
||||||
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
|
Description:
|
||||||
|
Devices that support discard functionality may
|
||||||
|
internally allocate space using units that are bigger
|
||||||
|
than the logical block size. The discard_granularity
|
||||||
|
parameter indicates the size of the internal allocation
|
||||||
|
unit in bytes if reported by the device. Otherwise the
|
||||||
|
discard_granularity will be set to match the device's
|
||||||
|
physical block size. A discard_granularity of 0 means
|
||||||
|
that the device does not support discard functionality.
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/discard_max_bytes
|
||||||
|
Date: May 2011
|
||||||
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
|
Description:
|
||||||
|
Devices that support discard functionality may have
|
||||||
|
internal limits on the number of bytes that can be
|
||||||
|
trimmed or unmapped in a single operation. Some storage
|
||||||
|
protocols also have inherent limits on the number of
|
||||||
|
blocks that can be described in a single command. The
|
||||||
|
discard_max_bytes parameter is set by the device driver
|
||||||
|
to the maximum number of bytes that can be discarded in
|
||||||
|
a single operation. Discard requests issued to the
|
||||||
|
device must not exceed this limit. A discard_max_bytes
|
||||||
|
value of 0 means that the device does not support
|
||||||
|
discard functionality.
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/discard_zeroes_data
|
||||||
|
Date: May 2011
|
||||||
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
|
Description:
|
||||||
|
Devices that support discard functionality may return
|
||||||
|
stale or random data when a previously discarded block
|
||||||
|
is read back. This can cause problems if the filesystem
|
||||||
|
expects discarded blocks to be explicitly cleared. If a
|
||||||
|
device reports that it deterministically returns zeroes
|
||||||
|
when a discarded area is read the discard_zeroes_data
|
||||||
|
parameter will be set to one. Otherwise it will be 0 and
|
||||||
|
the result of reading a discarded area is undefined.
|
||||||
|
|
|
@ -385,25 +385,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
|
||||||
|
|
||||||
spin_lock_irqsave(&blkg->stats_lock, flags);
|
spin_lock_irqsave(&blkg->stats_lock, flags);
|
||||||
blkg->stats.time += time;
|
blkg->stats.time += time;
|
||||||
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||||
blkg->stats.unaccounted_time += unaccounted_time;
|
blkg->stats.unaccounted_time += unaccounted_time;
|
||||||
|
#endif
|
||||||
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
|
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* should be called under rcu read lock or queue lock to make sure blkg pointer
|
||||||
|
* is valid.
|
||||||
|
*/
|
||||||
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
|
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
|
||||||
uint64_t bytes, bool direction, bool sync)
|
uint64_t bytes, bool direction, bool sync)
|
||||||
{
|
{
|
||||||
struct blkio_group_stats *stats;
|
struct blkio_group_stats_cpu *stats_cpu;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&blkg->stats_lock, flags);
|
/*
|
||||||
stats = &blkg->stats;
|
* Disabling interrupts to provide mutual exclusion between two
|
||||||
stats->sectors += bytes >> 9;
|
* writes on same cpu. It probably is not needed for 64bit. Not
|
||||||
blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
|
* optimizing that case yet.
|
||||||
sync);
|
*/
|
||||||
blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
|
local_irq_save(flags);
|
||||||
direction, sync);
|
|
||||||
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
stats_cpu = this_cpu_ptr(blkg->stats_cpu);
|
||||||
|
|
||||||
|
u64_stats_update_begin(&stats_cpu->syncp);
|
||||||
|
stats_cpu->sectors += bytes >> 9;
|
||||||
|
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
|
||||||
|
1, direction, sync);
|
||||||
|
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
|
||||||
|
bytes, direction, sync);
|
||||||
|
u64_stats_update_end(&stats_cpu->syncp);
|
||||||
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
|
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
|
||||||
|
|
||||||
|
@ -426,18 +441,44 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
|
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
|
||||||
|
|
||||||
|
/* Merged stats are per cpu. */
|
||||||
void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
|
void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
|
||||||
bool sync)
|
bool sync)
|
||||||
{
|
{
|
||||||
|
struct blkio_group_stats_cpu *stats_cpu;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&blkg->stats_lock, flags);
|
/*
|
||||||
blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
|
* Disabling interrupts to provide mutual exclusion between two
|
||||||
sync);
|
* writes on same cpu. It probably is not needed for 64bit. Not
|
||||||
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
* optimizing that case yet.
|
||||||
|
*/
|
||||||
|
local_irq_save(flags);
|
||||||
|
|
||||||
|
stats_cpu = this_cpu_ptr(blkg->stats_cpu);
|
||||||
|
|
||||||
|
u64_stats_update_begin(&stats_cpu->syncp);
|
||||||
|
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
|
||||||
|
direction, sync);
|
||||||
|
u64_stats_update_end(&stats_cpu->syncp);
|
||||||
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
|
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function allocates the per cpu stats for blkio_group. Should be called
|
||||||
|
* from sleepable context as alloc_per_cpu() requires that.
|
||||||
|
*/
|
||||||
|
int blkio_alloc_blkg_stats(struct blkio_group *blkg)
|
||||||
|
{
|
||||||
|
/* Allocate memory for per cpu stats */
|
||||||
|
blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
|
||||||
|
if (!blkg->stats_cpu)
|
||||||
|
return -ENOMEM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
|
||||||
|
|
||||||
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||||
struct blkio_group *blkg, void *key, dev_t dev,
|
struct blkio_group *blkg, void *key, dev_t dev,
|
||||||
enum blkio_policy_id plid)
|
enum blkio_policy_id plid)
|
||||||
|
@ -508,6 +549,30 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
|
EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
|
||||||
|
|
||||||
|
static void blkio_reset_stats_cpu(struct blkio_group *blkg)
|
||||||
|
{
|
||||||
|
struct blkio_group_stats_cpu *stats_cpu;
|
||||||
|
int i, j, k;
|
||||||
|
/*
|
||||||
|
* Note: On 64 bit arch this should not be an issue. This has the
|
||||||
|
* possibility of returning some inconsistent value on 32bit arch
|
||||||
|
* as 64bit update on 32bit is non atomic. Taking care of this
|
||||||
|
* corner case makes code very complicated, like sending IPIs to
|
||||||
|
* cpus, taking care of stats of offline cpus etc.
|
||||||
|
*
|
||||||
|
* reset stats is anyway more of a debug feature and this sounds a
|
||||||
|
* corner case. So I am not complicating the code yet until and
|
||||||
|
* unless this becomes a real issue.
|
||||||
|
*/
|
||||||
|
for_each_possible_cpu(i) {
|
||||||
|
stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
|
||||||
|
stats_cpu->sectors = 0;
|
||||||
|
for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
|
||||||
|
for (k = 0; k < BLKIO_STAT_TOTAL; k++)
|
||||||
|
stats_cpu->stat_arr_cpu[j][k] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
||||||
{
|
{
|
||||||
|
@ -552,7 +617,11 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
spin_unlock(&blkg->stats_lock);
|
spin_unlock(&blkg->stats_lock);
|
||||||
|
|
||||||
|
/* Reset Per cpu stats which don't take blkg->stats_lock */
|
||||||
|
blkio_reset_stats_cpu(blkg);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&blkcg->lock);
|
spin_unlock_irq(&blkcg->lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -598,6 +667,59 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
|
||||||
|
enum stat_type_cpu type, enum stat_sub_type sub_type)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
struct blkio_group_stats_cpu *stats_cpu;
|
||||||
|
u64 val = 0, tval;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
unsigned int start;
|
||||||
|
stats_cpu = per_cpu_ptr(blkg->stats_cpu, cpu);
|
||||||
|
|
||||||
|
do {
|
||||||
|
start = u64_stats_fetch_begin(&stats_cpu->syncp);
|
||||||
|
if (type == BLKIO_STAT_CPU_SECTORS)
|
||||||
|
tval = stats_cpu->sectors;
|
||||||
|
else
|
||||||
|
tval = stats_cpu->stat_arr_cpu[type][sub_type];
|
||||||
|
} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
|
||||||
|
|
||||||
|
val += tval;
|
||||||
|
}
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
|
||||||
|
struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
|
||||||
|
{
|
||||||
|
uint64_t disk_total, val;
|
||||||
|
char key_str[MAX_KEY_LEN];
|
||||||
|
enum stat_sub_type sub_type;
|
||||||
|
|
||||||
|
if (type == BLKIO_STAT_CPU_SECTORS) {
|
||||||
|
val = blkio_read_stat_cpu(blkg, type, 0);
|
||||||
|
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
|
||||||
|
sub_type++) {
|
||||||
|
blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
|
||||||
|
val = blkio_read_stat_cpu(blkg, type, sub_type);
|
||||||
|
cb->fill(cb, key_str, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
|
||||||
|
blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
|
||||||
|
|
||||||
|
blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
|
||||||
|
cb->fill(cb, key_str, disk_total);
|
||||||
|
return disk_total;
|
||||||
|
}
|
||||||
|
|
||||||
/* This should be called with blkg->stats_lock held */
|
/* This should be called with blkg->stats_lock held */
|
||||||
static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
||||||
struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
|
struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
|
||||||
|
@ -609,9 +731,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
||||||
if (type == BLKIO_STAT_TIME)
|
if (type == BLKIO_STAT_TIME)
|
||||||
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
||||||
blkg->stats.time, cb, dev);
|
blkg->stats.time, cb, dev);
|
||||||
if (type == BLKIO_STAT_SECTORS)
|
|
||||||
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
|
||||||
blkg->stats.sectors, cb, dev);
|
|
||||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||||
if (type == BLKIO_STAT_UNACCOUNTED_TIME)
|
if (type == BLKIO_STAT_UNACCOUNTED_TIME)
|
||||||
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
||||||
|
@ -1075,8 +1194,8 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
||||||
struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
|
struct cftype *cft, struct cgroup_map_cb *cb,
|
||||||
bool show_total)
|
enum stat_type type, bool show_total, bool pcpu)
|
||||||
{
|
{
|
||||||
struct blkio_group *blkg;
|
struct blkio_group *blkg;
|
||||||
struct hlist_node *n;
|
struct hlist_node *n;
|
||||||
|
@ -1087,10 +1206,15 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
||||||
if (blkg->dev) {
|
if (blkg->dev) {
|
||||||
if (!cftype_blkg_same_policy(cft, blkg))
|
if (!cftype_blkg_same_policy(cft, blkg))
|
||||||
continue;
|
continue;
|
||||||
spin_lock_irq(&blkg->stats_lock);
|
if (pcpu)
|
||||||
cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
|
cgroup_total += blkio_get_stat_cpu(blkg, cb,
|
||||||
type);
|
blkg->dev, type);
|
||||||
spin_unlock_irq(&blkg->stats_lock);
|
else {
|
||||||
|
spin_lock_irq(&blkg->stats_lock);
|
||||||
|
cgroup_total += blkio_get_stat(blkg, cb,
|
||||||
|
blkg->dev, type);
|
||||||
|
spin_unlock_irq(&blkg->stats_lock);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (show_total)
|
if (show_total)
|
||||||
|
@ -1114,47 +1238,47 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
||||||
switch(name) {
|
switch(name) {
|
||||||
case BLKIO_PROP_time:
|
case BLKIO_PROP_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_TIME, 0);
|
BLKIO_STAT_TIME, 0, 0);
|
||||||
case BLKIO_PROP_sectors:
|
case BLKIO_PROP_sectors:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SECTORS, 0);
|
BLKIO_STAT_CPU_SECTORS, 0, 1);
|
||||||
case BLKIO_PROP_io_service_bytes:
|
case BLKIO_PROP_io_service_bytes:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SERVICE_BYTES, 1);
|
BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
|
||||||
case BLKIO_PROP_io_serviced:
|
case BLKIO_PROP_io_serviced:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SERVICED, 1);
|
BLKIO_STAT_CPU_SERVICED, 1, 1);
|
||||||
case BLKIO_PROP_io_service_time:
|
case BLKIO_PROP_io_service_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SERVICE_TIME, 1);
|
BLKIO_STAT_SERVICE_TIME, 1, 0);
|
||||||
case BLKIO_PROP_io_wait_time:
|
case BLKIO_PROP_io_wait_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_WAIT_TIME, 1);
|
BLKIO_STAT_WAIT_TIME, 1, 0);
|
||||||
case BLKIO_PROP_io_merged:
|
case BLKIO_PROP_io_merged:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_MERGED, 1);
|
BLKIO_STAT_CPU_MERGED, 1, 1);
|
||||||
case BLKIO_PROP_io_queued:
|
case BLKIO_PROP_io_queued:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_QUEUED, 1);
|
BLKIO_STAT_QUEUED, 1, 0);
|
||||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||||
case BLKIO_PROP_unaccounted_time:
|
case BLKIO_PROP_unaccounted_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_UNACCOUNTED_TIME, 0);
|
BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
|
||||||
case BLKIO_PROP_dequeue:
|
case BLKIO_PROP_dequeue:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_DEQUEUE, 0);
|
BLKIO_STAT_DEQUEUE, 0, 0);
|
||||||
case BLKIO_PROP_avg_queue_size:
|
case BLKIO_PROP_avg_queue_size:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_AVG_QUEUE_SIZE, 0);
|
BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
|
||||||
case BLKIO_PROP_group_wait_time:
|
case BLKIO_PROP_group_wait_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_GROUP_WAIT_TIME, 0);
|
BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
|
||||||
case BLKIO_PROP_idle_time:
|
case BLKIO_PROP_idle_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_IDLE_TIME, 0);
|
BLKIO_STAT_IDLE_TIME, 0, 0);
|
||||||
case BLKIO_PROP_empty_time:
|
case BLKIO_PROP_empty_time:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_EMPTY_TIME, 0);
|
BLKIO_STAT_EMPTY_TIME, 0, 0);
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
|
@ -1164,10 +1288,10 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
||||||
switch(name){
|
switch(name){
|
||||||
case BLKIO_THROTL_io_service_bytes:
|
case BLKIO_THROTL_io_service_bytes:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SERVICE_BYTES, 1);
|
BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
|
||||||
case BLKIO_THROTL_io_serviced:
|
case BLKIO_THROTL_io_serviced:
|
||||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||||
BLKIO_STAT_SERVICED, 1);
|
BLKIO_STAT_CPU_SERVICED, 1, 1);
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
|
#include <linux/u64_stats_sync.h>
|
||||||
|
|
||||||
enum blkio_policy_id {
|
enum blkio_policy_id {
|
||||||
BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
|
BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
|
||||||
|
@ -36,22 +37,15 @@ enum stat_type {
|
||||||
* request completion for IOs doen by this cgroup. This may not be
|
* request completion for IOs doen by this cgroup. This may not be
|
||||||
* accurate when NCQ is turned on. */
|
* accurate when NCQ is turned on. */
|
||||||
BLKIO_STAT_SERVICE_TIME = 0,
|
BLKIO_STAT_SERVICE_TIME = 0,
|
||||||
/* Total bytes transferred */
|
|
||||||
BLKIO_STAT_SERVICE_BYTES,
|
|
||||||
/* Total IOs serviced, post merge */
|
|
||||||
BLKIO_STAT_SERVICED,
|
|
||||||
/* Total time spent waiting in scheduler queue in ns */
|
/* Total time spent waiting in scheduler queue in ns */
|
||||||
BLKIO_STAT_WAIT_TIME,
|
BLKIO_STAT_WAIT_TIME,
|
||||||
/* Number of IOs merged */
|
|
||||||
BLKIO_STAT_MERGED,
|
|
||||||
/* Number of IOs queued up */
|
/* Number of IOs queued up */
|
||||||
BLKIO_STAT_QUEUED,
|
BLKIO_STAT_QUEUED,
|
||||||
/* All the single valued stats go below this */
|
/* All the single valued stats go below this */
|
||||||
BLKIO_STAT_TIME,
|
BLKIO_STAT_TIME,
|
||||||
BLKIO_STAT_SECTORS,
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||||
/* Time not charged to this cgroup */
|
/* Time not charged to this cgroup */
|
||||||
BLKIO_STAT_UNACCOUNTED_TIME,
|
BLKIO_STAT_UNACCOUNTED_TIME,
|
||||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
|
||||||
BLKIO_STAT_AVG_QUEUE_SIZE,
|
BLKIO_STAT_AVG_QUEUE_SIZE,
|
||||||
BLKIO_STAT_IDLE_TIME,
|
BLKIO_STAT_IDLE_TIME,
|
||||||
BLKIO_STAT_EMPTY_TIME,
|
BLKIO_STAT_EMPTY_TIME,
|
||||||
|
@ -60,6 +54,18 @@ enum stat_type {
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Per cpu stats */
|
||||||
|
enum stat_type_cpu {
|
||||||
|
BLKIO_STAT_CPU_SECTORS,
|
||||||
|
/* Total bytes transferred */
|
||||||
|
BLKIO_STAT_CPU_SERVICE_BYTES,
|
||||||
|
/* Total IOs serviced, post merge */
|
||||||
|
BLKIO_STAT_CPU_SERVICED,
|
||||||
|
/* Number of IOs merged */
|
||||||
|
BLKIO_STAT_CPU_MERGED,
|
||||||
|
BLKIO_STAT_CPU_NR
|
||||||
|
};
|
||||||
|
|
||||||
enum stat_sub_type {
|
enum stat_sub_type {
|
||||||
BLKIO_STAT_READ = 0,
|
BLKIO_STAT_READ = 0,
|
||||||
BLKIO_STAT_WRITE,
|
BLKIO_STAT_WRITE,
|
||||||
|
@ -116,11 +122,11 @@ struct blkio_cgroup {
|
||||||
struct blkio_group_stats {
|
struct blkio_group_stats {
|
||||||
/* total disk time and nr sectors dispatched by this group */
|
/* total disk time and nr sectors dispatched by this group */
|
||||||
uint64_t time;
|
uint64_t time;
|
||||||
uint64_t sectors;
|
|
||||||
/* Time not charged to this cgroup */
|
|
||||||
uint64_t unaccounted_time;
|
|
||||||
uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
|
uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
|
||||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||||
|
/* Time not charged to this cgroup */
|
||||||
|
uint64_t unaccounted_time;
|
||||||
|
|
||||||
/* Sum of number of IOs queued across all samples */
|
/* Sum of number of IOs queued across all samples */
|
||||||
uint64_t avg_queue_size_sum;
|
uint64_t avg_queue_size_sum;
|
||||||
/* Count of samples taken for average */
|
/* Count of samples taken for average */
|
||||||
|
@ -145,6 +151,13 @@ struct blkio_group_stats {
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Per cpu blkio group stats */
|
||||||
|
struct blkio_group_stats_cpu {
|
||||||
|
uint64_t sectors;
|
||||||
|
uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
|
||||||
|
struct u64_stats_sync syncp;
|
||||||
|
};
|
||||||
|
|
||||||
struct blkio_group {
|
struct blkio_group {
|
||||||
/* An rcu protected unique identifier for the group */
|
/* An rcu protected unique identifier for the group */
|
||||||
void *key;
|
void *key;
|
||||||
|
@ -160,6 +173,8 @@ struct blkio_group {
|
||||||
/* Need to serialize the stats in the case of reset/update */
|
/* Need to serialize the stats in the case of reset/update */
|
||||||
spinlock_t stats_lock;
|
spinlock_t stats_lock;
|
||||||
struct blkio_group_stats stats;
|
struct blkio_group_stats stats;
|
||||||
|
/* Per cpu stats pointer */
|
||||||
|
struct blkio_group_stats_cpu __percpu *stats_cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct blkio_policy_node {
|
struct blkio_policy_node {
|
||||||
|
@ -295,6 +310,7 @@ extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
|
||||||
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||||
struct blkio_group *blkg, void *key, dev_t dev,
|
struct blkio_group *blkg, void *key, dev_t dev,
|
||||||
enum blkio_policy_id plid);
|
enum blkio_policy_id plid);
|
||||||
|
extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
|
||||||
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
|
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
|
||||||
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
|
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
|
||||||
void *key);
|
void *key);
|
||||||
|
@ -322,6 +338,8 @@ static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||||
struct blkio_group *blkg, void *key, dev_t dev,
|
struct blkio_group *blkg, void *key, dev_t dev,
|
||||||
enum blkio_policy_id plid) {}
|
enum blkio_policy_id plid) {}
|
||||||
|
|
||||||
|
static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
|
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
|
||||||
|
|
||||||
|
|
|
@ -569,8 +569,6 @@ int blk_get_queue(struct request_queue *q)
|
||||||
|
|
||||||
static inline void blk_free_request(struct request_queue *q, struct request *rq)
|
static inline void blk_free_request(struct request_queue *q, struct request *rq)
|
||||||
{
|
{
|
||||||
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
|
|
||||||
|
|
||||||
if (rq->cmd_flags & REQ_ELVPRIV)
|
if (rq->cmd_flags & REQ_ELVPRIV)
|
||||||
elv_put_request(q, rq);
|
elv_put_request(q, rq);
|
||||||
mempool_free(rq, q->rq.rq_pool);
|
mempool_free(rq, q->rq.rq_pool);
|
||||||
|
@ -1110,14 +1108,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
|
||||||
{
|
{
|
||||||
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
||||||
|
|
||||||
/*
|
|
||||||
* Debug stuff, kill later
|
|
||||||
*/
|
|
||||||
if (!rq_mergeable(req)) {
|
|
||||||
blk_dump_rq_flags(req, "back");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ll_back_merge_fn(q, req, bio))
|
if (!ll_back_merge_fn(q, req, bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1132,6 +1122,7 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
|
||||||
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
|
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
|
||||||
|
|
||||||
drive_stat_acct(req, 0);
|
drive_stat_acct(req, 0);
|
||||||
|
elv_bio_merged(q, req, bio);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1141,14 +1132,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
|
||||||
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
||||||
sector_t sector;
|
sector_t sector;
|
||||||
|
|
||||||
/*
|
|
||||||
* Debug stuff, kill later
|
|
||||||
*/
|
|
||||||
if (!rq_mergeable(req)) {
|
|
||||||
blk_dump_rq_flags(req, "front");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ll_front_merge_fn(q, req, bio))
|
if (!ll_front_merge_fn(q, req, bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1173,6 +1156,7 @@ static bool bio_attempt_front_merge(struct request_queue *q,
|
||||||
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
|
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
|
||||||
|
|
||||||
drive_stat_acct(req, 0);
|
drive_stat_acct(req, 0);
|
||||||
|
elv_bio_merged(q, req, bio);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1258,14 +1242,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
|
||||||
|
|
||||||
el_ret = elv_merge(q, &req, bio);
|
el_ret = elv_merge(q, &req, bio);
|
||||||
if (el_ret == ELEVATOR_BACK_MERGE) {
|
if (el_ret == ELEVATOR_BACK_MERGE) {
|
||||||
BUG_ON(req->cmd_flags & REQ_ON_PLUG);
|
|
||||||
if (bio_attempt_back_merge(q, req, bio)) {
|
if (bio_attempt_back_merge(q, req, bio)) {
|
||||||
if (!attempt_back_merge(q, req))
|
if (!attempt_back_merge(q, req))
|
||||||
elv_merged_request(q, req, el_ret);
|
elv_merged_request(q, req, el_ret);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
|
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
|
||||||
BUG_ON(req->cmd_flags & REQ_ON_PLUG);
|
|
||||||
if (bio_attempt_front_merge(q, req, bio)) {
|
if (bio_attempt_front_merge(q, req, bio)) {
|
||||||
if (!attempt_front_merge(q, req))
|
if (!attempt_front_merge(q, req))
|
||||||
elv_merged_request(q, req, el_ret);
|
elv_merged_request(q, req, el_ret);
|
||||||
|
@ -1320,10 +1302,6 @@ get_rq:
|
||||||
if (__rq->q != q)
|
if (__rq->q != q)
|
||||||
plug->should_sort = 1;
|
plug->should_sort = 1;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* Debug flag, kill later
|
|
||||||
*/
|
|
||||||
req->cmd_flags |= REQ_ON_PLUG;
|
|
||||||
list_add_tail(&req->queuelist, &plug->list);
|
list_add_tail(&req->queuelist, &plug->list);
|
||||||
drive_stat_acct(req, 1);
|
drive_stat_acct(req, 1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1550,7 +1528,8 @@ static inline void __generic_make_request(struct bio *bio)
|
||||||
goto end_io;
|
goto end_io;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_throtl_bio(q, &bio);
|
if (blk_throtl_bio(q, &bio))
|
||||||
|
goto end_io;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If bio = NULL, bio has been throttled and will be submitted
|
* If bio = NULL, bio has been throttled and will be submitted
|
||||||
|
@ -2748,7 +2727,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||||
while (!list_empty(&list)) {
|
while (!list_empty(&list)) {
|
||||||
rq = list_entry_rq(list.next);
|
rq = list_entry_rq(list.next);
|
||||||
list_del_init(&rq->queuelist);
|
list_del_init(&rq->queuelist);
|
||||||
BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
|
|
||||||
BUG_ON(!rq->q);
|
BUG_ON(!rq->q);
|
||||||
if (rq->q != q) {
|
if (rq->q != q) {
|
||||||
/*
|
/*
|
||||||
|
@ -2760,8 +2738,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||||
depth = 0;
|
depth = 0;
|
||||||
spin_lock(q->queue_lock);
|
spin_lock(q->queue_lock);
|
||||||
}
|
}
|
||||||
rq->cmd_flags &= ~REQ_ON_PLUG;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* rq is already accounted, so use raw insert
|
* rq is already accounted, so use raw insert
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -56,7 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
__elv_add_request(q, rq, where);
|
__elv_add_request(q, rq, where);
|
||||||
__blk_run_queue(q);
|
__blk_run_queue(q);
|
||||||
/* the queue is stopped so it won't be plugged+unplugged */
|
/* the queue is stopped so it won't be run */
|
||||||
if (rq->cmd_type == REQ_TYPE_PM_RESUME)
|
if (rq->cmd_type == REQ_TYPE_PM_RESUME)
|
||||||
q->request_fn(q);
|
q->request_fn(q);
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
|
@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Moving a request silently to empty queue_head may stall the
|
* Kick the queue to avoid stall for two cases:
|
||||||
* queue. Kick the queue in those cases. This function is called
|
* 1. Moving a request silently to empty queue_head may stall the
|
||||||
* from request completion path and calling directly into
|
* queue.
|
||||||
* request_fn may confuse the driver. Always use kblockd.
|
* 2. When flush request is running in non-queueable queue, the
|
||||||
|
* queue is hold. Restart the queue after flush request is finished
|
||||||
|
* to avoid stall.
|
||||||
|
* This function is called from request completion path and calling
|
||||||
|
* directly into request_fn may confuse the driver. Always use
|
||||||
|
* kblockd.
|
||||||
*/
|
*/
|
||||||
if (queued)
|
if (queued || q->flush_queue_delayed)
|
||||||
blk_run_queue_async(q);
|
blk_run_queue_async(q);
|
||||||
|
q->flush_queue_delayed = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -96,6 +96,9 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
|
||||||
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
|
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
|
||||||
INIT_HLIST_HEAD(&ret->cic_list);
|
INIT_HLIST_HEAD(&ret->cic_list);
|
||||||
ret->ioc_data = NULL;
|
ret->ioc_data = NULL;
|
||||||
|
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
|
||||||
|
ret->cgroup_changed = 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -9,17 +9,20 @@
|
||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
|
|
||||||
static void blkdev_discard_end_io(struct bio *bio, int err)
|
struct bio_batch {
|
||||||
|
atomic_t done;
|
||||||
|
unsigned long flags;
|
||||||
|
struct completion *wait;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void bio_batch_end_io(struct bio *bio, int err)
|
||||||
{
|
{
|
||||||
if (err) {
|
struct bio_batch *bb = bio->bi_private;
|
||||||
if (err == -EOPNOTSUPP)
|
|
||||||
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
|
|
||||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bio->bi_private)
|
|
||||||
complete(bio->bi_private);
|
|
||||||
|
|
||||||
|
if (err && (err != -EOPNOTSUPP))
|
||||||
|
clear_bit(BIO_UPTODATE, &bb->flags);
|
||||||
|
if (atomic_dec_and_test(&bb->done))
|
||||||
|
complete(bb->wait);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,6 +44,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
struct request_queue *q = bdev_get_queue(bdev);
|
struct request_queue *q = bdev_get_queue(bdev);
|
||||||
int type = REQ_WRITE | REQ_DISCARD;
|
int type = REQ_WRITE | REQ_DISCARD;
|
||||||
unsigned int max_discard_sectors;
|
unsigned int max_discard_sectors;
|
||||||
|
struct bio_batch bb;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
@ -67,7 +71,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
type |= REQ_SECURE;
|
type |= REQ_SECURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (nr_sects && !ret) {
|
atomic_set(&bb.done, 1);
|
||||||
|
bb.flags = 1 << BIO_UPTODATE;
|
||||||
|
bb.wait = &wait;
|
||||||
|
|
||||||
|
while (nr_sects) {
|
||||||
bio = bio_alloc(gfp_mask, 1);
|
bio = bio_alloc(gfp_mask, 1);
|
||||||
if (!bio) {
|
if (!bio) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
@ -75,9 +83,9 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
}
|
}
|
||||||
|
|
||||||
bio->bi_sector = sector;
|
bio->bi_sector = sector;
|
||||||
bio->bi_end_io = blkdev_discard_end_io;
|
bio->bi_end_io = bio_batch_end_io;
|
||||||
bio->bi_bdev = bdev;
|
bio->bi_bdev = bdev;
|
||||||
bio->bi_private = &wait;
|
bio->bi_private = &bb;
|
||||||
|
|
||||||
if (nr_sects > max_discard_sectors) {
|
if (nr_sects > max_discard_sectors) {
|
||||||
bio->bi_size = max_discard_sectors << 9;
|
bio->bi_size = max_discard_sectors << 9;
|
||||||
|
@ -88,45 +96,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
nr_sects = 0;
|
nr_sects = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio_get(bio);
|
atomic_inc(&bb.done);
|
||||||
submit_bio(type, bio);
|
submit_bio(type, bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for bios in-flight */
|
||||||
|
if (!atomic_dec_and_test(&bb.done))
|
||||||
wait_for_completion(&wait);
|
wait_for_completion(&wait);
|
||||||
|
|
||||||
if (bio_flagged(bio, BIO_EOPNOTSUPP))
|
if (!test_bit(BIO_UPTODATE, &bb.flags))
|
||||||
ret = -EOPNOTSUPP;
|
ret = -EIO;
|
||||||
else if (!bio_flagged(bio, BIO_UPTODATE))
|
|
||||||
ret = -EIO;
|
|
||||||
bio_put(bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blkdev_issue_discard);
|
EXPORT_SYMBOL(blkdev_issue_discard);
|
||||||
|
|
||||||
struct bio_batch
|
|
||||||
{
|
|
||||||
atomic_t done;
|
|
||||||
unsigned long flags;
|
|
||||||
struct completion *wait;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void bio_batch_end_io(struct bio *bio, int err)
|
|
||||||
{
|
|
||||||
struct bio_batch *bb = bio->bi_private;
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
if (err == -EOPNOTSUPP)
|
|
||||||
set_bit(BIO_EOPNOTSUPP, &bb->flags);
|
|
||||||
else
|
|
||||||
clear_bit(BIO_UPTODATE, &bb->flags);
|
|
||||||
}
|
|
||||||
if (bb)
|
|
||||||
if (atomic_dec_and_test(&bb->done))
|
|
||||||
complete(bb->wait);
|
|
||||||
bio_put(bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blkdev_issue_zeroout - generate number of zero filed write bios
|
* blkdev_issue_zeroout - generate number of zero filed write bios
|
||||||
* @bdev: blockdev to issue
|
* @bdev: blockdev to issue
|
||||||
|
@ -151,7 +135,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
bb.flags = 1 << BIO_UPTODATE;
|
bb.flags = 1 << BIO_UPTODATE;
|
||||||
bb.wait = &wait;
|
bb.wait = &wait;
|
||||||
|
|
||||||
submit:
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
while (nr_sects != 0) {
|
while (nr_sects != 0) {
|
||||||
bio = bio_alloc(gfp_mask,
|
bio = bio_alloc(gfp_mask,
|
||||||
|
@ -168,9 +151,6 @@ submit:
|
||||||
|
|
||||||
while (nr_sects != 0) {
|
while (nr_sects != 0) {
|
||||||
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
|
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
|
||||||
if (sz == 0)
|
|
||||||
/* bio has maximum size possible */
|
|
||||||
break;
|
|
||||||
ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
|
ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
|
||||||
nr_sects -= ret >> 9;
|
nr_sects -= ret >> 9;
|
||||||
sector += ret >> 9;
|
sector += ret >> 9;
|
||||||
|
@ -190,16 +170,6 @@ submit:
|
||||||
/* One of bios in the batch was completed with error.*/
|
/* One of bios in the batch was completed with error.*/
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
|
|
||||||
ret = -EOPNOTSUPP;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (nr_sects != 0)
|
|
||||||
goto submit;
|
|
||||||
out:
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blkdev_issue_zeroout);
|
EXPORT_SYMBOL(blkdev_issue_zeroout);
|
||||||
|
|
|
@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||||
lim->discard_granularity = 0;
|
lim->discard_granularity = 0;
|
||||||
lim->discard_alignment = 0;
|
lim->discard_alignment = 0;
|
||||||
lim->discard_misaligned = 0;
|
lim->discard_misaligned = 0;
|
||||||
lim->discard_zeroes_data = -1;
|
lim->discard_zeroes_data = 1;
|
||||||
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
||||||
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
|
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
|
||||||
lim->alignment_offset = 0;
|
lim->alignment_offset = 0;
|
||||||
|
@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
|
||||||
|
|
||||||
blk_set_default_limits(&q->limits);
|
blk_set_default_limits(&q->limits);
|
||||||
blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
|
blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
|
||||||
|
q->limits.discard_zeroes_data = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* by default assume old behaviour and bounce for any highmem page
|
* by default assume old behaviour and bounce for any highmem page
|
||||||
|
@ -790,6 +791,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_queue_flush);
|
EXPORT_SYMBOL_GPL(blk_queue_flush);
|
||||||
|
|
||||||
|
void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
|
||||||
|
{
|
||||||
|
q->flush_not_queueable = !queueable;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
|
||||||
|
|
||||||
static int __init blk_settings_init(void)
|
static int __init blk_settings_init(void)
|
||||||
{
|
{
|
||||||
blk_max_low_pfn = max_low_pfn - 1;
|
blk_max_low_pfn = max_low_pfn - 1;
|
||||||
|
|
|
@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
|
||||||
|
|
||||||
static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
|
static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
return queue_var_show(q->limits.max_discard_sectors << 9, page);
|
return sprintf(page, "%llu\n",
|
||||||
|
(unsigned long long)q->limits.max_discard_sectors << 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
|
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
|
||||||
|
|
|
@ -78,6 +78,8 @@ struct throtl_grp {
|
||||||
|
|
||||||
/* Some throttle limits got updated for the group */
|
/* Some throttle limits got updated for the group */
|
||||||
int limits_changed;
|
int limits_changed;
|
||||||
|
|
||||||
|
struct rcu_head rcu_head;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct throtl_data
|
struct throtl_data
|
||||||
|
@ -88,7 +90,7 @@ struct throtl_data
|
||||||
/* service tree for active throtl groups */
|
/* service tree for active throtl groups */
|
||||||
struct throtl_rb_root tg_service_tree;
|
struct throtl_rb_root tg_service_tree;
|
||||||
|
|
||||||
struct throtl_grp root_tg;
|
struct throtl_grp *root_tg;
|
||||||
struct request_queue *queue;
|
struct request_queue *queue;
|
||||||
|
|
||||||
/* Total Number of queued bios on READ and WRITE lists */
|
/* Total Number of queued bios on READ and WRITE lists */
|
||||||
|
@ -151,56 +153,44 @@ static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
|
||||||
return tg;
|
return tg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void throtl_free_tg(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct throtl_grp *tg;
|
||||||
|
|
||||||
|
tg = container_of(head, struct throtl_grp, rcu_head);
|
||||||
|
free_percpu(tg->blkg.stats_cpu);
|
||||||
|
kfree(tg);
|
||||||
|
}
|
||||||
|
|
||||||
static void throtl_put_tg(struct throtl_grp *tg)
|
static void throtl_put_tg(struct throtl_grp *tg)
|
||||||
{
|
{
|
||||||
BUG_ON(atomic_read(&tg->ref) <= 0);
|
BUG_ON(atomic_read(&tg->ref) <= 0);
|
||||||
if (!atomic_dec_and_test(&tg->ref))
|
if (!atomic_dec_and_test(&tg->ref))
|
||||||
return;
|
return;
|
||||||
kfree(tg);
|
|
||||||
|
/*
|
||||||
|
* A group is freed in rcu manner. But having an rcu lock does not
|
||||||
|
* mean that one can access all the fields of blkg and assume these
|
||||||
|
* are valid. For example, don't try to follow throtl_data and
|
||||||
|
* request queue links.
|
||||||
|
*
|
||||||
|
* Having a reference to blkg under an rcu allows acess to only
|
||||||
|
* values local to groups like group stats and group rate limits
|
||||||
|
*/
|
||||||
|
call_rcu(&tg->rcu_head, throtl_free_tg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
|
static void throtl_init_group(struct throtl_grp *tg)
|
||||||
struct blkio_cgroup *blkcg)
|
|
||||||
{
|
{
|
||||||
struct throtl_grp *tg = NULL;
|
|
||||||
void *key = td;
|
|
||||||
struct backing_dev_info *bdi = &td->queue->backing_dev_info;
|
|
||||||
unsigned int major, minor;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: Speed up blkiocg_lookup_group() by maintaining a radix
|
|
||||||
* tree of blkg (instead of traversing through hash list all
|
|
||||||
* the time.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is the common case when there are no blkio cgroups.
|
|
||||||
* Avoid lookup in this case
|
|
||||||
*/
|
|
||||||
if (blkcg == &blkio_root_cgroup)
|
|
||||||
tg = &td->root_tg;
|
|
||||||
else
|
|
||||||
tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
|
|
||||||
|
|
||||||
/* Fill in device details for root group */
|
|
||||||
if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
|
|
||||||
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
|
||||||
tg->blkg.dev = MKDEV(major, minor);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tg)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
|
|
||||||
if (!tg)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
INIT_HLIST_NODE(&tg->tg_node);
|
INIT_HLIST_NODE(&tg->tg_node);
|
||||||
RB_CLEAR_NODE(&tg->rb_node);
|
RB_CLEAR_NODE(&tg->rb_node);
|
||||||
bio_list_init(&tg->bio_lists[0]);
|
bio_list_init(&tg->bio_lists[0]);
|
||||||
bio_list_init(&tg->bio_lists[1]);
|
bio_list_init(&tg->bio_lists[1]);
|
||||||
td->limits_changed = false;
|
tg->limits_changed = false;
|
||||||
|
|
||||||
|
/* Practically unlimited BW */
|
||||||
|
tg->bps[0] = tg->bps[1] = -1;
|
||||||
|
tg->iops[0] = tg->iops[1] = -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take the initial reference that will be released on destroy
|
* Take the initial reference that will be released on destroy
|
||||||
|
@ -209,33 +199,181 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
|
||||||
* exit or cgroup deletion path depending on who is exiting first.
|
* exit or cgroup deletion path depending on who is exiting first.
|
||||||
*/
|
*/
|
||||||
atomic_set(&tg->ref, 1);
|
atomic_set(&tg->ref, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Should be called with rcu read lock held (needed for blkcg) */
|
||||||
|
static void
|
||||||
|
throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
|
||||||
|
{
|
||||||
|
hlist_add_head(&tg->tg_node, &td->tg_list);
|
||||||
|
td->nr_undestroyed_grps++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
|
||||||
|
{
|
||||||
|
struct backing_dev_info *bdi = &td->queue->backing_dev_info;
|
||||||
|
unsigned int major, minor;
|
||||||
|
|
||||||
|
if (!tg || tg->blkg.dev)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill in device details for a group which might not have been
|
||||||
|
* filled at group creation time as queue was being instantiated
|
||||||
|
* and driver had not attached a device yet
|
||||||
|
*/
|
||||||
|
if (bdi->dev && dev_name(bdi->dev)) {
|
||||||
|
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
||||||
|
tg->blkg.dev = MKDEV(major, minor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should be called with without queue lock held. Here queue lock will be
|
||||||
|
* taken rarely. It will be taken only once during life time of a group
|
||||||
|
* if need be
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
|
||||||
|
{
|
||||||
|
if (!tg || tg->blkg.dev)
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock_irq(td->queue->queue_lock);
|
||||||
|
__throtl_tg_fill_dev_details(td, tg);
|
||||||
|
spin_unlock_irq(td->queue->queue_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void throtl_init_add_tg_lists(struct throtl_data *td,
|
||||||
|
struct throtl_grp *tg, struct blkio_cgroup *blkcg)
|
||||||
|
{
|
||||||
|
__throtl_tg_fill_dev_details(td, tg);
|
||||||
|
|
||||||
/* Add group onto cgroup list */
|
/* Add group onto cgroup list */
|
||||||
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
|
||||||
blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
|
blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
|
||||||
MKDEV(major, minor), BLKIO_POLICY_THROTL);
|
tg->blkg.dev, BLKIO_POLICY_THROTL);
|
||||||
|
|
||||||
tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
|
tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
|
||||||
tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
|
tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
|
||||||
tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
|
tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
|
||||||
tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
|
tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
|
||||||
|
|
||||||
hlist_add_head(&tg->tg_node, &td->tg_list);
|
throtl_add_group_to_td_list(td, tg);
|
||||||
td->nr_undestroyed_grps++;
|
}
|
||||||
done:
|
|
||||||
|
/* Should be called without queue lock and outside of rcu period */
|
||||||
|
static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
|
||||||
|
{
|
||||||
|
struct throtl_grp *tg = NULL;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
|
||||||
|
if (!tg)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
ret = blkio_alloc_blkg_stats(&tg->blkg);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
kfree(tg);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
throtl_init_group(tg);
|
||||||
return tg;
|
return tg;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
|
static struct
|
||||||
|
throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
|
||||||
{
|
{
|
||||||
struct throtl_grp *tg = NULL;
|
struct throtl_grp *tg = NULL;
|
||||||
|
void *key = td;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the common case when there are no blkio cgroups.
|
||||||
|
* Avoid lookup in this case
|
||||||
|
*/
|
||||||
|
if (blkcg == &blkio_root_cgroup)
|
||||||
|
tg = td->root_tg;
|
||||||
|
else
|
||||||
|
tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
|
||||||
|
|
||||||
|
__throtl_tg_fill_dev_details(td, tg);
|
||||||
|
return tg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function returns with queue lock unlocked in case of error, like
|
||||||
|
* request queue is no more
|
||||||
|
*/
|
||||||
|
static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
|
||||||
|
{
|
||||||
|
struct throtl_grp *tg = NULL, *__tg = NULL;
|
||||||
struct blkio_cgroup *blkcg;
|
struct blkio_cgroup *blkcg;
|
||||||
|
struct request_queue *q = td->queue;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
blkcg = task_blkio_cgroup(current);
|
blkcg = task_blkio_cgroup(current);
|
||||||
tg = throtl_find_alloc_tg(td, blkcg);
|
tg = throtl_find_tg(td, blkcg);
|
||||||
if (!tg)
|
if (tg) {
|
||||||
tg = &td->root_tg;
|
rcu_read_unlock();
|
||||||
|
return tg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to allocate a group. Allocation of group also needs allocation
|
||||||
|
* of per cpu stats which in-turn takes a mutex() and can block. Hence
|
||||||
|
* we need to drop rcu lock and queue_lock before we call alloc
|
||||||
|
*
|
||||||
|
* Take the request queue reference to make sure queue does not
|
||||||
|
* go away once we return from allocation.
|
||||||
|
*/
|
||||||
|
blk_get_queue(q);
|
||||||
|
rcu_read_unlock();
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
tg = throtl_alloc_tg(td);
|
||||||
|
/*
|
||||||
|
* We might have slept in group allocation. Make sure queue is not
|
||||||
|
* dead
|
||||||
|
*/
|
||||||
|
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
|
||||||
|
blk_put_queue(q);
|
||||||
|
if (tg)
|
||||||
|
kfree(tg);
|
||||||
|
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
}
|
||||||
|
blk_put_queue(q);
|
||||||
|
|
||||||
|
/* Group allocated and queue is still alive. take the lock */
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the new group. After sleeping, read the blkcg again.
|
||||||
|
*/
|
||||||
|
rcu_read_lock();
|
||||||
|
blkcg = task_blkio_cgroup(current);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If some other thread already allocated the group while we were
|
||||||
|
* not holding queue lock, free up the group
|
||||||
|
*/
|
||||||
|
__tg = throtl_find_tg(td, blkcg);
|
||||||
|
|
||||||
|
if (__tg) {
|
||||||
|
kfree(tg);
|
||||||
|
rcu_read_unlock();
|
||||||
|
return __tg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Group allocation failed. Account the IO to root group */
|
||||||
|
if (!tg) {
|
||||||
|
tg = td->root_tg;
|
||||||
|
return tg;
|
||||||
|
}
|
||||||
|
|
||||||
|
throtl_init_add_tg_lists(td, tg, blkcg);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return tg;
|
return tg;
|
||||||
}
|
}
|
||||||
|
@ -544,6 +682,12 @@ static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) {
|
||||||
|
if (tg->bps[rw] == -1 && tg->iops[rw] == -1)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns whether one can dispatch a bio or not. Also returns approx number
|
* Returns whether one can dispatch a bio or not. Also returns approx number
|
||||||
* of jiffies to wait before this bio is with-in IO rate and can be dispatched
|
* of jiffies to wait before this bio is with-in IO rate and can be dispatched
|
||||||
|
@ -608,10 +752,6 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
|
||||||
tg->bytes_disp[rw] += bio->bi_size;
|
tg->bytes_disp[rw] += bio->bi_size;
|
||||||
tg->io_disp[rw]++;
|
tg->io_disp[rw]++;
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: This will take blkg->stats_lock. Figure out a way
|
|
||||||
* to avoid this cost.
|
|
||||||
*/
|
|
||||||
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
|
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -989,15 +1129,51 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
|
||||||
struct throtl_grp *tg;
|
struct throtl_grp *tg;
|
||||||
struct bio *bio = *biop;
|
struct bio *bio = *biop;
|
||||||
bool rw = bio_data_dir(bio), update_disptime = true;
|
bool rw = bio_data_dir(bio), update_disptime = true;
|
||||||
|
struct blkio_cgroup *blkcg;
|
||||||
|
|
||||||
if (bio->bi_rw & REQ_THROTTLED) {
|
if (bio->bi_rw & REQ_THROTTLED) {
|
||||||
bio->bi_rw &= ~REQ_THROTTLED;
|
bio->bi_rw &= ~REQ_THROTTLED;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A throtl_grp pointer retrieved under rcu can be used to access
|
||||||
|
* basic fields like stats and io rates. If a group has no rules,
|
||||||
|
* just update the dispatch stats in lockless manner and return.
|
||||||
|
*/
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
blkcg = task_blkio_cgroup(current);
|
||||||
|
tg = throtl_find_tg(td, blkcg);
|
||||||
|
if (tg) {
|
||||||
|
throtl_tg_fill_dev_details(td, tg);
|
||||||
|
|
||||||
|
if (tg_no_rule_group(tg, rw)) {
|
||||||
|
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
|
||||||
|
rw, bio->bi_rw & REQ_SYNC);
|
||||||
|
rcu_read_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Either group has not been allocated yet or it is not an unlimited
|
||||||
|
* IO group
|
||||||
|
*/
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
tg = throtl_get_tg(td);
|
tg = throtl_get_tg(td);
|
||||||
|
|
||||||
|
if (IS_ERR(tg)) {
|
||||||
|
if (PTR_ERR(tg) == -ENODEV) {
|
||||||
|
/*
|
||||||
|
* Queue is gone. No queue lock held here.
|
||||||
|
*/
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (tg->nr_queued[rw]) {
|
if (tg->nr_queued[rw]) {
|
||||||
/*
|
/*
|
||||||
* There is already another bio queued in same dir. No
|
* There is already another bio queued in same dir. No
|
||||||
|
@ -1060,39 +1236,24 @@ int blk_throtl_init(struct request_queue *q)
|
||||||
INIT_HLIST_HEAD(&td->tg_list);
|
INIT_HLIST_HEAD(&td->tg_list);
|
||||||
td->tg_service_tree = THROTL_RB_ROOT;
|
td->tg_service_tree = THROTL_RB_ROOT;
|
||||||
td->limits_changed = false;
|
td->limits_changed = false;
|
||||||
|
|
||||||
/* Init root group */
|
|
||||||
tg = &td->root_tg;
|
|
||||||
INIT_HLIST_NODE(&tg->tg_node);
|
|
||||||
RB_CLEAR_NODE(&tg->rb_node);
|
|
||||||
bio_list_init(&tg->bio_lists[0]);
|
|
||||||
bio_list_init(&tg->bio_lists[1]);
|
|
||||||
|
|
||||||
/* Practically unlimited BW */
|
|
||||||
tg->bps[0] = tg->bps[1] = -1;
|
|
||||||
tg->iops[0] = tg->iops[1] = -1;
|
|
||||||
td->limits_changed = false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set root group reference to 2. One reference will be dropped when
|
|
||||||
* all groups on tg_list are being deleted during queue exit. Other
|
|
||||||
* reference will remain there as we don't want to delete this group
|
|
||||||
* as it is statically allocated and gets destroyed when throtl_data
|
|
||||||
* goes away.
|
|
||||||
*/
|
|
||||||
atomic_set(&tg->ref, 2);
|
|
||||||
hlist_add_head(&tg->tg_node, &td->tg_list);
|
|
||||||
td->nr_undestroyed_grps++;
|
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
|
INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
|
||||||
|
|
||||||
|
/* alloc and Init root group. */
|
||||||
|
td->queue = q;
|
||||||
|
tg = throtl_alloc_tg(td);
|
||||||
|
|
||||||
|
if (!tg) {
|
||||||
|
kfree(td);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
td->root_tg = tg;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td,
|
throtl_init_add_tg_lists(td, tg, &blkio_root_cgroup);
|
||||||
0, BLKIO_POLICY_THROTL);
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/* Attach throtl data to request queue */
|
/* Attach throtl data to request queue */
|
||||||
td->queue = q;
|
|
||||||
q->td = td;
|
q->td = td;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
23
block/blk.h
23
block/blk.h
|
@ -62,7 +62,28 @@ static inline struct request *__elv_next_request(struct request_queue *q)
|
||||||
return rq;
|
return rq;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
|
/*
|
||||||
|
* Flush request is running and flush request isn't queueable
|
||||||
|
* in the drive, we can hold the queue till flush request is
|
||||||
|
* finished. Even we don't do this, driver can't dispatch next
|
||||||
|
* requests and will requeue them. And this can improve
|
||||||
|
* throughput too. For example, we have request flush1, write1,
|
||||||
|
* flush 2. flush1 is dispatched, then queue is hold, write1
|
||||||
|
* isn't inserted to queue. After flush1 is finished, flush2
|
||||||
|
* will be dispatched. Since disk cache is already clean,
|
||||||
|
* flush2 will be finished very soon, so looks like flush2 is
|
||||||
|
* folded to flush1.
|
||||||
|
* Since the queue is hold, a flag is set to indicate the queue
|
||||||
|
* should be restarted later. Please see flush_end_io() for
|
||||||
|
* details.
|
||||||
|
*/
|
||||||
|
if (q->flush_pending_idx != q->flush_running_idx &&
|
||||||
|
!queue_flush_queueable(q)) {
|
||||||
|
q->flush_queue_delayed = 1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
|
||||||
|
!q->elevator->ops->elevator_dispatch_fn(q, 0))
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -300,7 +300,9 @@ struct cfq_data {
|
||||||
|
|
||||||
/* List of cfq groups being managed on this device*/
|
/* List of cfq groups being managed on this device*/
|
||||||
struct hlist_head cfqg_list;
|
struct hlist_head cfqg_list;
|
||||||
struct rcu_head rcu;
|
|
||||||
|
/* Number of groups which are on blkcg->blkg_list */
|
||||||
|
unsigned int nr_blkcg_linked_grps;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
|
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
|
||||||
|
@ -665,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
|
||||||
if (rq2 == NULL)
|
if (rq2 == NULL)
|
||||||
return rq1;
|
return rq1;
|
||||||
|
|
||||||
if (rq_is_sync(rq1) && !rq_is_sync(rq2))
|
if (rq_is_sync(rq1) != rq_is_sync(rq2))
|
||||||
return rq1;
|
return rq_is_sync(rq1) ? rq1 : rq2;
|
||||||
else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
|
|
||||||
return rq2;
|
if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
|
||||||
if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
|
return rq1->cmd_flags & REQ_META ? rq1 : rq2;
|
||||||
return rq1;
|
|
||||||
else if ((rq2->cmd_flags & REQ_META) &&
|
|
||||||
!(rq1->cmd_flags & REQ_META))
|
|
||||||
return rq2;
|
|
||||||
|
|
||||||
s1 = blk_rq_pos(rq1);
|
s1 = blk_rq_pos(rq1);
|
||||||
s2 = blk_rq_pos(rq2);
|
s2 = blk_rq_pos(rq2);
|
||||||
|
@ -1014,28 +1012,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
|
||||||
cfqg->needs_update = true;
|
cfqg->needs_update = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
|
static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
|
||||||
struct blkio_cgroup *blkcg, int create)
|
struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
|
||||||
{
|
{
|
||||||
struct cfq_group *cfqg = NULL;
|
|
||||||
void *key = cfqd;
|
|
||||||
int i, j;
|
|
||||||
struct cfq_rb_root *st;
|
|
||||||
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
|
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
|
||||||
unsigned int major, minor;
|
unsigned int major, minor;
|
||||||
|
|
||||||
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
|
/*
|
||||||
if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
|
* Add group onto cgroup list. It might happen that bdi->dev is
|
||||||
|
* not initialized yet. Initialize this new group without major
|
||||||
|
* and minor info and this info will be filled in once a new thread
|
||||||
|
* comes for IO.
|
||||||
|
*/
|
||||||
|
if (bdi->dev) {
|
||||||
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
||||||
cfqg->blkg.dev = MKDEV(major, minor);
|
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
|
||||||
goto done;
|
(void *)cfqd, MKDEV(major, minor));
|
||||||
}
|
} else
|
||||||
if (cfqg || !create)
|
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
|
||||||
goto done;
|
(void *)cfqd, 0);
|
||||||
|
|
||||||
|
cfqd->nr_blkcg_linked_grps++;
|
||||||
|
cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
|
||||||
|
|
||||||
|
/* Add group on cfqd list */
|
||||||
|
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should be called from sleepable context. No request queue lock as per
|
||||||
|
* cpu stats are allocated dynamically and alloc_percpu needs to be called
|
||||||
|
* from sleepable context.
|
||||||
|
*/
|
||||||
|
static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
|
||||||
|
{
|
||||||
|
struct cfq_group *cfqg = NULL;
|
||||||
|
int i, j, ret;
|
||||||
|
struct cfq_rb_root *st;
|
||||||
|
|
||||||
cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
|
cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
|
||||||
if (!cfqg)
|
if (!cfqg)
|
||||||
goto done;
|
return NULL;
|
||||||
|
|
||||||
for_each_cfqg_st(cfqg, i, j, st)
|
for_each_cfqg_st(cfqg, i, j, st)
|
||||||
*st = CFQ_RB_ROOT;
|
*st = CFQ_RB_ROOT;
|
||||||
|
@ -1049,43 +1066,94 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
|
||||||
*/
|
*/
|
||||||
cfqg->ref = 1;
|
cfqg->ref = 1;
|
||||||
|
|
||||||
|
ret = blkio_alloc_blkg_stats(&cfqg->blkg);
|
||||||
|
if (ret) {
|
||||||
|
kfree(cfqg);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cfqg;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct cfq_group *
|
||||||
|
cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
|
||||||
|
{
|
||||||
|
struct cfq_group *cfqg = NULL;
|
||||||
|
void *key = cfqd;
|
||||||
|
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
|
||||||
|
unsigned int major, minor;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add group onto cgroup list. It might happen that bdi->dev is
|
* This is the common case when there are no blkio cgroups.
|
||||||
* not initialized yet. Initialize this new group without major
|
* Avoid lookup in this case
|
||||||
* and minor info and this info will be filled in once a new thread
|
|
||||||
* comes for IO. See code above.
|
|
||||||
*/
|
*/
|
||||||
if (bdi->dev) {
|
if (blkcg == &blkio_root_cgroup)
|
||||||
|
cfqg = &cfqd->root_group;
|
||||||
|
else
|
||||||
|
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
|
||||||
|
|
||||||
|
if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
|
||||||
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
|
||||||
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
|
cfqg->blkg.dev = MKDEV(major, minor);
|
||||||
MKDEV(major, minor));
|
}
|
||||||
} else
|
|
||||||
cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
|
|
||||||
0);
|
|
||||||
|
|
||||||
cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
|
|
||||||
|
|
||||||
/* Add group on cfqd list */
|
|
||||||
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
|
|
||||||
|
|
||||||
done:
|
|
||||||
return cfqg;
|
return cfqg;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Search for the cfq group current task belongs to. If create = 1, then also
|
* Search for the cfq group current task belongs to. request_queue lock must
|
||||||
* create the cfq group if it does not exist. request_queue lock must be held.
|
* be held.
|
||||||
*/
|
*/
|
||||||
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
|
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
|
||||||
{
|
{
|
||||||
struct blkio_cgroup *blkcg;
|
struct blkio_cgroup *blkcg;
|
||||||
struct cfq_group *cfqg = NULL;
|
struct cfq_group *cfqg = NULL, *__cfqg = NULL;
|
||||||
|
struct request_queue *q = cfqd->queue;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
blkcg = task_blkio_cgroup(current);
|
blkcg = task_blkio_cgroup(current);
|
||||||
cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create);
|
cfqg = cfq_find_cfqg(cfqd, blkcg);
|
||||||
if (!cfqg && create)
|
if (cfqg) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return cfqg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to allocate a group. Allocation of group also needs allocation
|
||||||
|
* of per cpu stats which in-turn takes a mutex() and can block. Hence
|
||||||
|
* we need to drop rcu lock and queue_lock before we call alloc.
|
||||||
|
*
|
||||||
|
* Not taking any queue reference here and assuming that queue is
|
||||||
|
* around by the time we return. CFQ queue allocation code does
|
||||||
|
* the same. It might be racy though.
|
||||||
|
*/
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
cfqg = cfq_alloc_cfqg(cfqd);
|
||||||
|
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
blkcg = task_blkio_cgroup(current);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If some other thread already allocated the group while we were
|
||||||
|
* not holding queue lock, free up the group
|
||||||
|
*/
|
||||||
|
__cfqg = cfq_find_cfqg(cfqd, blkcg);
|
||||||
|
|
||||||
|
if (__cfqg) {
|
||||||
|
kfree(cfqg);
|
||||||
|
rcu_read_unlock();
|
||||||
|
return __cfqg;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cfqg)
|
||||||
cfqg = &cfqd->root_group;
|
cfqg = &cfqd->root_group;
|
||||||
|
|
||||||
|
cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return cfqg;
|
return cfqg;
|
||||||
}
|
}
|
||||||
|
@ -1118,6 +1186,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
|
||||||
return;
|
return;
|
||||||
for_each_cfqg_st(cfqg, i, j, st)
|
for_each_cfqg_st(cfqg, i, j, st)
|
||||||
BUG_ON(!RB_EMPTY_ROOT(&st->rb));
|
BUG_ON(!RB_EMPTY_ROOT(&st->rb));
|
||||||
|
free_percpu(cfqg->blkg.stats_cpu);
|
||||||
kfree(cfqg);
|
kfree(cfqg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1176,7 +1245,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* GROUP_IOSCHED */
|
#else /* GROUP_IOSCHED */
|
||||||
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
|
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
|
||||||
{
|
{
|
||||||
return &cfqd->root_group;
|
return &cfqd->root_group;
|
||||||
}
|
}
|
||||||
|
@ -1210,7 +1279,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
||||||
struct cfq_rb_root *service_tree;
|
struct cfq_rb_root *service_tree;
|
||||||
int left;
|
int left;
|
||||||
int new_cfqq = 1;
|
int new_cfqq = 1;
|
||||||
int group_changed = 0;
|
|
||||||
|
|
||||||
service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
|
service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
|
||||||
cfqq_type(cfqq));
|
cfqq_type(cfqq));
|
||||||
|
@ -1281,7 +1349,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
||||||
rb_link_node(&cfqq->rb_node, parent, p);
|
rb_link_node(&cfqq->rb_node, parent, p);
|
||||||
rb_insert_color(&cfqq->rb_node, &service_tree->rb);
|
rb_insert_color(&cfqq->rb_node, &service_tree->rb);
|
||||||
service_tree->count++;
|
service_tree->count++;
|
||||||
if ((add_front || !new_cfqq) && !group_changed)
|
if (add_front || !new_cfqq)
|
||||||
return;
|
return;
|
||||||
cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
|
cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
|
||||||
}
|
}
|
||||||
|
@ -2029,7 +2097,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
||||||
|
|
||||||
WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
|
WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
|
||||||
|
|
||||||
return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
|
return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2911,7 +2979,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
|
||||||
struct cfq_group *cfqg;
|
struct cfq_group *cfqg;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
cfqg = cfq_get_cfqg(cfqd, 1);
|
cfqg = cfq_get_cfqg(cfqd);
|
||||||
cic = cfq_cic_lookup(cfqd, ioc);
|
cic = cfq_cic_lookup(cfqd, ioc);
|
||||||
/* cic always exists here */
|
/* cic always exists here */
|
||||||
cfqq = cic_to_cfqq(cic, is_sync);
|
cfqq = cic_to_cfqq(cic, is_sync);
|
||||||
|
@ -3815,15 +3883,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
|
||||||
cfq_put_queue(cfqd->async_idle_cfqq);
|
cfq_put_queue(cfqd->async_idle_cfqq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cfq_cfqd_free(struct rcu_head *head)
|
|
||||||
{
|
|
||||||
kfree(container_of(head, struct cfq_data, rcu));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void cfq_exit_queue(struct elevator_queue *e)
|
static void cfq_exit_queue(struct elevator_queue *e)
|
||||||
{
|
{
|
||||||
struct cfq_data *cfqd = e->elevator_data;
|
struct cfq_data *cfqd = e->elevator_data;
|
||||||
struct request_queue *q = cfqd->queue;
|
struct request_queue *q = cfqd->queue;
|
||||||
|
bool wait = false;
|
||||||
|
|
||||||
cfq_shutdown_timer_wq(cfqd);
|
cfq_shutdown_timer_wq(cfqd);
|
||||||
|
|
||||||
|
@ -3842,7 +3906,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
||||||
|
|
||||||
cfq_put_async_queues(cfqd);
|
cfq_put_async_queues(cfqd);
|
||||||
cfq_release_cfq_groups(cfqd);
|
cfq_release_cfq_groups(cfqd);
|
||||||
cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
|
|
||||||
|
/*
|
||||||
|
* If there are groups which we could not unlink from blkcg list,
|
||||||
|
* wait for a rcu period for them to be freed.
|
||||||
|
*/
|
||||||
|
if (cfqd->nr_blkcg_linked_grps)
|
||||||
|
wait = true;
|
||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
@ -3852,8 +3922,25 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
||||||
ida_remove(&cic_index_ida, cfqd->cic_index);
|
ida_remove(&cic_index_ida, cfqd->cic_index);
|
||||||
spin_unlock(&cic_index_lock);
|
spin_unlock(&cic_index_lock);
|
||||||
|
|
||||||
/* Wait for cfqg->blkg->key accessors to exit their grace periods. */
|
/*
|
||||||
call_rcu(&cfqd->rcu, cfq_cfqd_free);
|
* Wait for cfqg->blkg->key accessors to exit their grace periods.
|
||||||
|
* Do this wait only if there are other unlinked groups out
|
||||||
|
* there. This can happen if cgroup deletion path claimed the
|
||||||
|
* responsibility of cleaning up a group before queue cleanup code
|
||||||
|
* get to the group.
|
||||||
|
*
|
||||||
|
* Do not call synchronize_rcu() unconditionally as there are drivers
|
||||||
|
* which create/delete request queue hundreds of times during scan/boot
|
||||||
|
* and synchronize_rcu() can take significant time and slow down boot.
|
||||||
|
*/
|
||||||
|
if (wait)
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
|
/* Free up per cpu stats for root group */
|
||||||
|
free_percpu(cfqd->root_group.blkg.stats_cpu);
|
||||||
|
#endif
|
||||||
|
kfree(cfqd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cfq_alloc_cic_index(void)
|
static int cfq_alloc_cic_index(void)
|
||||||
|
@ -3886,8 +3973,12 @@ static void *cfq_init_queue(struct request_queue *q)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
|
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
|
||||||
if (!cfqd)
|
if (!cfqd) {
|
||||||
|
spin_lock(&cic_index_lock);
|
||||||
|
ida_remove(&cic_index_ida, i);
|
||||||
|
spin_unlock(&cic_index_lock);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't need take queue_lock in the routine, since we are
|
* Don't need take queue_lock in the routine, since we are
|
||||||
|
@ -3909,14 +4000,29 @@ static void *cfq_init_queue(struct request_queue *q)
|
||||||
|
|
||||||
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
/*
|
/*
|
||||||
* Take a reference to root group which we never drop. This is just
|
* Set root group reference to 2. One reference will be dropped when
|
||||||
* to make sure that cfq_put_cfqg() does not try to kfree root group
|
* all groups on cfqd->cfqg_list are being deleted during queue exit.
|
||||||
|
* Other reference will remain there as we don't want to delete this
|
||||||
|
* group as it is statically allocated and gets destroyed when
|
||||||
|
* throtl_data goes away.
|
||||||
*/
|
*/
|
||||||
cfqg->ref = 1;
|
cfqg->ref = 2;
|
||||||
|
|
||||||
|
if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
|
||||||
|
kfree(cfqg);
|
||||||
|
kfree(cfqd);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
|
cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
|
||||||
(void *)cfqd, 0);
|
(void *)cfqd, 0);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
cfqd->nr_blkcg_linked_grps++;
|
||||||
|
|
||||||
|
/* Add group on cfqd->cfqg_list */
|
||||||
|
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* Not strictly needed (since RB_ROOT just clears the node and we
|
* Not strictly needed (since RB_ROOT just clears the node and we
|
||||||
|
|
|
@ -155,13 +155,8 @@ static struct elevator_type *elevator_get(const char *name)
|
||||||
|
|
||||||
e = elevator_find(name);
|
e = elevator_find(name);
|
||||||
if (!e) {
|
if (!e) {
|
||||||
char elv[ELV_NAME_MAX + strlen("-iosched")];
|
|
||||||
|
|
||||||
spin_unlock(&elv_list_lock);
|
spin_unlock(&elv_list_lock);
|
||||||
|
request_module("%s-iosched", name);
|
||||||
snprintf(elv, sizeof(elv), "%s-iosched", name);
|
|
||||||
|
|
||||||
request_module("%s", elv);
|
|
||||||
spin_lock(&elv_list_lock);
|
spin_lock(&elv_list_lock);
|
||||||
e = elevator_find(name);
|
e = elevator_find(name);
|
||||||
}
|
}
|
||||||
|
@ -421,8 +416,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
|
||||||
struct list_head *entry;
|
struct list_head *entry;
|
||||||
int stop_flags;
|
int stop_flags;
|
||||||
|
|
||||||
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
|
|
||||||
|
|
||||||
if (q->last_merge == rq)
|
if (q->last_merge == rq)
|
||||||
q->last_merge = NULL;
|
q->last_merge = NULL;
|
||||||
|
|
||||||
|
@ -661,8 +654,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
|
||||||
|
|
||||||
rq->q = q;
|
rq->q = q;
|
||||||
|
|
||||||
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
|
|
||||||
|
|
||||||
if (rq->cmd_flags & REQ_SOFTBARRIER) {
|
if (rq->cmd_flags & REQ_SOFTBARRIER) {
|
||||||
/* barriers are scheduling boundary, update end_sector */
|
/* barriers are scheduling boundary, update end_sector */
|
||||||
if (rq->cmd_type == REQ_TYPE_FS ||
|
if (rq->cmd_type == REQ_TYPE_FS ||
|
||||||
|
|
|
@ -1089,21 +1089,21 @@ static int atapi_drain_needed(struct request *rq)
|
||||||
static int ata_scsi_dev_config(struct scsi_device *sdev,
|
static int ata_scsi_dev_config(struct scsi_device *sdev,
|
||||||
struct ata_device *dev)
|
struct ata_device *dev)
|
||||||
{
|
{
|
||||||
|
struct request_queue *q = sdev->request_queue;
|
||||||
|
|
||||||
if (!ata_id_has_unload(dev->id))
|
if (!ata_id_has_unload(dev->id))
|
||||||
dev->flags |= ATA_DFLAG_NO_UNLOAD;
|
dev->flags |= ATA_DFLAG_NO_UNLOAD;
|
||||||
|
|
||||||
/* configure max sectors */
|
/* configure max sectors */
|
||||||
blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors);
|
blk_queue_max_hw_sectors(q, dev->max_sectors);
|
||||||
|
|
||||||
if (dev->class == ATA_DEV_ATAPI) {
|
if (dev->class == ATA_DEV_ATAPI) {
|
||||||
struct request_queue *q = sdev->request_queue;
|
|
||||||
void *buf;
|
void *buf;
|
||||||
|
|
||||||
sdev->sector_size = ATA_SECT_SIZE;
|
sdev->sector_size = ATA_SECT_SIZE;
|
||||||
|
|
||||||
/* set DMA padding */
|
/* set DMA padding */
|
||||||
blk_queue_update_dma_pad(sdev->request_queue,
|
blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
|
||||||
ATA_DMA_PAD_SZ - 1);
|
|
||||||
|
|
||||||
/* configure draining */
|
/* configure draining */
|
||||||
buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
|
buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
|
||||||
|
@ -1131,8 +1131,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
|
||||||
"sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
|
"sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
|
||||||
sdev->sector_size);
|
sdev->sector_size);
|
||||||
|
|
||||||
blk_queue_update_dma_alignment(sdev->request_queue,
|
blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
|
||||||
sdev->sector_size - 1);
|
|
||||||
|
|
||||||
if (dev->flags & ATA_DFLAG_AN)
|
if (dev->flags & ATA_DFLAG_AN)
|
||||||
set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
|
set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
|
||||||
|
@ -1145,6 +1144,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
|
||||||
scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
|
scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_queue_flush_queueable(q, false);
|
||||||
|
|
||||||
dev->sdev = sdev;
|
dev->sdev = sdev;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -320,6 +320,8 @@ static void pcd_init_units(void)
|
||||||
disk->first_minor = unit;
|
disk->first_minor = unit;
|
||||||
strcpy(disk->disk_name, cd->name); /* umm... */
|
strcpy(disk->disk_name, cd->name); /* umm... */
|
||||||
disk->fops = &pcd_bdops;
|
disk->fops = &pcd_bdops;
|
||||||
|
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||||
|
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -625,7 +625,9 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
|
||||||
blk_queue_max_hw_sectors(q, 4096 / 512);
|
blk_queue_max_hw_sectors(q, 4096 / 512);
|
||||||
gendisk->queue = q;
|
gendisk->queue = q;
|
||||||
gendisk->fops = &viocd_fops;
|
gendisk->fops = &viocd_fops;
|
||||||
gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
|
gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
|
||||||
|
GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||||
|
gendisk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||||
set_capacity(gendisk, 0);
|
set_capacity(gendisk, 0);
|
||||||
gendisk->private_data = d;
|
gendisk->private_data = d;
|
||||||
d->viocd_disk = gendisk;
|
d->viocd_disk = gendisk;
|
||||||
|
|
|
@ -1781,7 +1781,8 @@ static int ide_cd_probe(ide_drive_t *drive)
|
||||||
|
|
||||||
ide_cd_read_toc(drive, &sense);
|
ide_cd_read_toc(drive, &sense);
|
||||||
g->fops = &idecd_ops;
|
g->fops = &idecd_ops;
|
||||||
g->flags |= GENHD_FL_REMOVABLE;
|
g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||||
|
g->events = DISK_EVENT_MEDIA_CHANGE;
|
||||||
add_disk(g);
|
add_disk(g);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
|
@ -636,7 +636,7 @@ static int sr_probe(struct device *dev)
|
||||||
disk->first_minor = minor;
|
disk->first_minor = minor;
|
||||||
sprintf(disk->disk_name, "sr%d", minor);
|
sprintf(disk->disk_name, "sr%d", minor);
|
||||||
disk->fops = &sr_bdops;
|
disk->fops = &sr_bdops;
|
||||||
disk->flags = GENHD_FL_CD;
|
disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||||
disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
|
disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
|
||||||
|
|
||||||
blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
|
blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
|
||||||
|
|
|
@ -1238,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
|
||||||
res = __blkdev_get(bdev, mode, 0);
|
res = __blkdev_get(bdev, mode, 0);
|
||||||
|
|
||||||
if (whole) {
|
if (whole) {
|
||||||
|
struct gendisk *disk = whole->bd_disk;
|
||||||
|
|
||||||
/* finish claiming */
|
/* finish claiming */
|
||||||
mutex_lock(&bdev->bd_mutex);
|
mutex_lock(&bdev->bd_mutex);
|
||||||
spin_lock(&bdev_lock);
|
spin_lock(&bdev_lock);
|
||||||
|
@ -1264,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
|
||||||
spin_unlock(&bdev_lock);
|
spin_unlock(&bdev_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block event polling for write claims. Any write
|
* Block event polling for write claims if requested. Any
|
||||||
* holder makes the write_holder state stick until all
|
* write holder makes the write_holder state stick until
|
||||||
* are released. This is good enough and tracking
|
* all are released. This is good enough and tracking
|
||||||
* individual writeable reference is too fragile given
|
* individual writeable reference is too fragile given the
|
||||||
* the way @mode is used in blkdev_get/put().
|
* way @mode is used in blkdev_get/put().
|
||||||
*/
|
*/
|
||||||
if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
|
if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
|
||||||
|
!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
|
||||||
bdev->bd_write_holder = true;
|
bdev->bd_write_holder = true;
|
||||||
disk_block_events(bdev->bd_disk);
|
disk_block_events(disk);
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_unlock(&bdev->bd_mutex);
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
|
|
|
@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
struct hd_struct *p = dev_to_part(dev);
|
struct hd_struct *p = dev_to_part(dev);
|
||||||
return sprintf(buf, "%u\n", p->discard_alignment);
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
return sprintf(buf, "%u\n",
|
||||||
|
queue_limit_discard_alignment(&disk->queue->limits,
|
||||||
|
p->start_sect));
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t part_stat_show(struct device *dev,
|
ssize_t part_stat_show(struct device *dev,
|
||||||
|
@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||||
p->start_sect = start;
|
p->start_sect = start;
|
||||||
p->alignment_offset =
|
p->alignment_offset =
|
||||||
queue_limit_alignment_offset(&disk->queue->limits, start);
|
queue_limit_alignment_offset(&disk->queue->limits, start);
|
||||||
p->discard_alignment =
|
|
||||||
queue_limit_discard_alignment(&disk->queue->limits, start);
|
|
||||||
p->nr_sects = len;
|
p->nr_sects = len;
|
||||||
p->partno = partno;
|
p->partno = partno;
|
||||||
p->policy = get_disk_ro(disk);
|
p->policy = get_disk_ro(disk);
|
||||||
|
|
|
@ -151,7 +151,6 @@ enum rq_flag_bits {
|
||||||
__REQ_IO_STAT, /* account I/O stat */
|
__REQ_IO_STAT, /* account I/O stat */
|
||||||
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
|
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
|
||||||
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
|
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
|
||||||
__REQ_ON_PLUG, /* on plug list */
|
|
||||||
__REQ_NR_BITS, /* stops here */
|
__REQ_NR_BITS, /* stops here */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -192,6 +191,5 @@ enum rq_flag_bits {
|
||||||
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
|
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
|
||||||
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
|
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
|
||||||
#define REQ_SECURE (1 << __REQ_SECURE)
|
#define REQ_SECURE (1 << __REQ_SECURE)
|
||||||
#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
|
|
||||||
|
|
||||||
#endif /* __LINUX_BLK_TYPES_H */
|
#endif /* __LINUX_BLK_TYPES_H */
|
||||||
|
|
|
@ -257,7 +257,7 @@ struct queue_limits {
|
||||||
unsigned char misaligned;
|
unsigned char misaligned;
|
||||||
unsigned char discard_misaligned;
|
unsigned char discard_misaligned;
|
||||||
unsigned char cluster;
|
unsigned char cluster;
|
||||||
signed char discard_zeroes_data;
|
unsigned char discard_zeroes_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct request_queue
|
struct request_queue
|
||||||
|
@ -364,6 +364,8 @@ struct request_queue
|
||||||
* for flush operations
|
* for flush operations
|
||||||
*/
|
*/
|
||||||
unsigned int flush_flags;
|
unsigned int flush_flags;
|
||||||
|
unsigned int flush_not_queueable:1;
|
||||||
|
unsigned int flush_queue_delayed:1;
|
||||||
unsigned int flush_pending_idx:1;
|
unsigned int flush_pending_idx:1;
|
||||||
unsigned int flush_running_idx:1;
|
unsigned int flush_running_idx:1;
|
||||||
unsigned long flush_pending_since;
|
unsigned long flush_pending_since;
|
||||||
|
@ -843,6 +845,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
||||||
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
||||||
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
||||||
extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
|
extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
|
||||||
|
extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
|
||||||
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
||||||
|
|
||||||
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
||||||
|
@ -1066,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
|
||||||
{
|
{
|
||||||
unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
|
unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
|
||||||
|
|
||||||
|
if (!lim->max_discard_sectors)
|
||||||
|
return 0;
|
||||||
|
|
||||||
return (lim->discard_granularity + lim->discard_alignment - alignment)
|
return (lim->discard_granularity + lim->discard_alignment - alignment)
|
||||||
& (lim->discard_granularity - 1);
|
& (lim->discard_granularity - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
|
static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
|
||||||
{
|
{
|
||||||
if (q->limits.discard_zeroes_data == 1)
|
if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1111,6 +1117,11 @@ static inline unsigned int block_size(struct block_device *bdev)
|
||||||
return bdev->bd_block_size;
|
return bdev->bd_block_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool queue_flush_queueable(struct request_queue *q)
|
||||||
|
{
|
||||||
|
return !q->flush_not_queueable;
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {struct page *v;} Sector;
|
typedef struct {struct page *v;} Sector;
|
||||||
|
|
||||||
unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
|
unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
|
||||||
|
|
|
@ -100,7 +100,6 @@ struct hd_struct {
|
||||||
sector_t start_sect;
|
sector_t start_sect;
|
||||||
sector_t nr_sects;
|
sector_t nr_sects;
|
||||||
sector_t alignment_offset;
|
sector_t alignment_offset;
|
||||||
unsigned int discard_alignment;
|
|
||||||
struct device __dev;
|
struct device __dev;
|
||||||
struct kobject *holder_dir;
|
struct kobject *holder_dir;
|
||||||
int policy, partno;
|
int policy, partno;
|
||||||
|
@ -127,6 +126,7 @@ struct hd_struct {
|
||||||
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
|
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
|
||||||
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
|
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
|
||||||
#define GENHD_FL_NATIVE_CAPACITY 128
|
#define GENHD_FL_NATIVE_CAPACITY 128
|
||||||
|
#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
|
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
|
||||||
|
|
|
@ -63,10 +63,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
|
||||||
unsigned long background_thresh;
|
unsigned long background_thresh;
|
||||||
unsigned long dirty_thresh;
|
unsigned long dirty_thresh;
|
||||||
unsigned long bdi_thresh;
|
unsigned long bdi_thresh;
|
||||||
unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
|
unsigned long nr_dirty, nr_io, nr_more_io;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
|
||||||
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
|
nr_dirty = nr_io = nr_more_io = 0;
|
||||||
spin_lock(&inode_wb_list_lock);
|
spin_lock(&inode_wb_list_lock);
|
||||||
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
|
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
|
||||||
nr_dirty++;
|
nr_dirty++;
|
||||||
|
|
Loading…
Reference in New Issue