dmaengine: centralize channel allocation, introduce dma_find_channel
Allowing multiple clients to each define their own channel allocation scheme quickly leads to a pathological situation. For memory-to-memory offload all clients can share a central allocator. This simply moves the existing async_tx allocator to dmaengine with minimal fixups: * async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan * async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance * split out common code from async_tx.c:__async_tx_find_channel --> dma_find_channel Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
parent
6f49a57aa5
commit
bec085134e
|
@ -37,26 +37,11 @@ static struct dma_client async_tx_dma = {
|
|||
/* .cap_mask == 0 defaults to all channels */
|
||||
};
|
||||
|
||||
/**
|
||||
* dma_cap_mask_all - enable iteration over all operation types
|
||||
*/
|
||||
static dma_cap_mask_t dma_cap_mask_all;
|
||||
|
||||
/**
|
||||
* chan_ref_percpu - tracks channel allocations per core/opertion
|
||||
*/
|
||||
struct chan_ref_percpu {
|
||||
struct dma_chan_ref *ref;
|
||||
};
|
||||
|
||||
static int channel_table_initialized;
|
||||
static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
|
||||
|
||||
/**
|
||||
* async_tx_lock - protect modification of async_tx_master_list and serialize
|
||||
* rebalance operations
|
||||
*/
|
||||
static spinlock_t async_tx_lock;
|
||||
static DEFINE_SPINLOCK(async_tx_lock);
|
||||
|
||||
static LIST_HEAD(async_tx_master_list);
|
||||
|
||||
|
@ -89,85 +74,6 @@ init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
|
|||
atomic_set(&ref->count, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* get_chan_ref_by_cap - returns the nth channel of the given capability
|
||||
* defaults to returning the channel with the desired capability and the
|
||||
* lowest reference count if the index can not be satisfied
|
||||
* @cap: capability to match
|
||||
* @index: nth channel desired, passing -1 has the effect of forcing the
|
||||
* default return value
|
||||
*/
|
||||
static struct dma_chan_ref *
|
||||
get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
|
||||
{
|
||||
struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(ref, &async_tx_master_list, node)
|
||||
if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
|
||||
if (!min_ref)
|
||||
min_ref = ref;
|
||||
else if (atomic_read(&ref->count) <
|
||||
atomic_read(&min_ref->count))
|
||||
min_ref = ref;
|
||||
|
||||
if (index-- == 0) {
|
||||
ret_ref = ref;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!ret_ref)
|
||||
ret_ref = min_ref;
|
||||
|
||||
if (ret_ref)
|
||||
atomic_inc(&ret_ref->count);
|
||||
|
||||
return ret_ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* async_tx_rebalance - redistribute the available channels, optimize
|
||||
* for cpu isolation in the SMP case, and opertaion isolation in the
|
||||
* uniprocessor case
|
||||
*/
|
||||
static void async_tx_rebalance(void)
|
||||
{
|
||||
int cpu, cap, cpu_idx = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (!channel_table_initialized)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&async_tx_lock, flags);
|
||||
|
||||
/* undo the last distribution */
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct dma_chan_ref *ref =
|
||||
per_cpu_ptr(channel_table[cap], cpu)->ref;
|
||||
if (ref) {
|
||||
atomic_set(&ref->count, 0);
|
||||
per_cpu_ptr(channel_table[cap], cpu)->ref =
|
||||
NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
for_each_online_cpu(cpu) {
|
||||
struct dma_chan_ref *new;
|
||||
if (NR_CPUS > 1)
|
||||
new = get_chan_ref_by_cap(cap, cpu_idx++);
|
||||
else
|
||||
new = get_chan_ref_by_cap(cap, -1);
|
||||
|
||||
per_cpu_ptr(channel_table[cap], cpu)->ref = new;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&async_tx_lock, flags);
|
||||
}
|
||||
|
||||
static enum dma_state_client
|
||||
dma_channel_add_remove(struct dma_client *client,
|
||||
struct dma_chan *chan, enum dma_state state)
|
||||
|
@ -211,8 +117,6 @@ dma_channel_add_remove(struct dma_client *client,
|
|||
" (-ENOMEM)\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
async_tx_rebalance();
|
||||
break;
|
||||
case DMA_RESOURCE_REMOVED:
|
||||
found = 0;
|
||||
|
@ -233,8 +137,6 @@ dma_channel_add_remove(struct dma_client *client,
|
|||
ack = DMA_ACK;
|
||||
else
|
||||
break;
|
||||
|
||||
async_tx_rebalance();
|
||||
break;
|
||||
case DMA_RESOURCE_SUSPEND:
|
||||
case DMA_RESOURCE_RESUME:
|
||||
|
@ -248,51 +150,18 @@ dma_channel_add_remove(struct dma_client *client,
|
|||
return ack;
|
||||
}
|
||||
|
||||
static int __init
|
||||
async_tx_init(void)
|
||||
static int __init async_tx_init(void)
|
||||
{
|
||||
enum dma_transaction_type cap;
|
||||
|
||||
spin_lock_init(&async_tx_lock);
|
||||
bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
|
||||
|
||||
/* an interrupt will never be an explicit operation type.
|
||||
* clearing this bit prevents allocation to a slot in 'channel_table'
|
||||
*/
|
||||
clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
|
||||
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all) {
|
||||
channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
|
||||
if (!channel_table[cap])
|
||||
goto err;
|
||||
}
|
||||
|
||||
channel_table_initialized = 1;
|
||||
dma_async_client_register(&async_tx_dma);
|
||||
dma_async_client_chan_request(&async_tx_dma);
|
||||
|
||||
printk(KERN_INFO "async_tx: api initialized (async)\n");
|
||||
|
||||
return 0;
|
||||
err:
|
||||
printk(KERN_ERR "async_tx: initialization failure\n");
|
||||
|
||||
while (--cap >= 0)
|
||||
free_percpu(channel_table[cap]);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __exit async_tx_exit(void)
|
||||
{
|
||||
enum dma_transaction_type cap;
|
||||
|
||||
channel_table_initialized = 0;
|
||||
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
if (channel_table[cap])
|
||||
free_percpu(channel_table[cap]);
|
||||
|
||||
dma_async_client_unregister(&async_tx_dma);
|
||||
}
|
||||
|
||||
|
@ -308,16 +177,9 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
|
|||
{
|
||||
/* see if we can keep the chain on one channel */
|
||||
if (depend_tx &&
|
||||
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
|
||||
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
|
||||
return depend_tx->chan;
|
||||
else if (likely(channel_table_initialized)) {
|
||||
struct dma_chan_ref *ref;
|
||||
int cpu = get_cpu();
|
||||
ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
|
||||
put_cpu();
|
||||
return ref ? ref->chan : NULL;
|
||||
} else
|
||||
return NULL;
|
||||
return dma_find_channel(tx_type);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__async_tx_find_channel);
|
||||
#else
|
||||
|
|
|
@ -294,6 +294,164 @@ static void dma_chan_release(struct dma_chan *chan)
|
|||
call_rcu(&chan->rcu, dma_chan_free_rcu);
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_cap_mask_all - enable iteration over all operation types
|
||||
*/
|
||||
static dma_cap_mask_t dma_cap_mask_all;
|
||||
|
||||
/**
|
||||
* dma_chan_tbl_ent - tracks channel allocations per core/operation
|
||||
* @chan - associated channel for this entry
|
||||
*/
|
||||
struct dma_chan_tbl_ent {
|
||||
struct dma_chan *chan;
|
||||
};
|
||||
|
||||
/**
|
||||
* channel_table - percpu lookup table for memory-to-memory offload providers
|
||||
*/
|
||||
static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END];
|
||||
|
||||
static int __init dma_channel_table_init(void)
|
||||
{
|
||||
enum dma_transaction_type cap;
|
||||
int err = 0;
|
||||
|
||||
bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
|
||||
|
||||
/* 'interrupt' and 'slave' are channel capabilities, but are not
|
||||
* associated with an operation so they do not need an entry in the
|
||||
* channel_table
|
||||
*/
|
||||
clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
|
||||
clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
|
||||
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all) {
|
||||
channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
|
||||
if (!channel_table[cap]) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
pr_err("dmaengine: initialization failure\n");
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
if (channel_table[cap])
|
||||
free_percpu(channel_table[cap]);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
subsys_initcall(dma_channel_table_init);
|
||||
|
||||
/**
|
||||
* dma_find_channel - find a channel to carry out the operation
|
||||
* @tx_type: transaction type
|
||||
*/
|
||||
struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
|
||||
{
|
||||
struct dma_chan *chan;
|
||||
int cpu;
|
||||
|
||||
WARN_ONCE(dmaengine_ref_count == 0,
|
||||
"client called %s without a reference", __func__);
|
||||
|
||||
cpu = get_cpu();
|
||||
chan = per_cpu_ptr(channel_table[tx_type], cpu)->chan;
|
||||
put_cpu();
|
||||
|
||||
return chan;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_find_channel);
|
||||
|
||||
/**
|
||||
* nth_chan - returns the nth channel of the given capability
|
||||
* @cap: capability to match
|
||||
* @n: nth channel desired
|
||||
*
|
||||
* Defaults to returning the channel with the desired capability and the
|
||||
* lowest reference count when 'n' cannot be satisfied. Must be called
|
||||
* under dma_list_mutex.
|
||||
*/
|
||||
static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n)
|
||||
{
|
||||
struct dma_device *device;
|
||||
struct dma_chan *chan;
|
||||
struct dma_chan *ret = NULL;
|
||||
struct dma_chan *min = NULL;
|
||||
|
||||
list_for_each_entry(device, &dma_device_list, global_node) {
|
||||
if (!dma_has_cap(cap, device->cap_mask))
|
||||
continue;
|
||||
list_for_each_entry(chan, &device->channels, device_node) {
|
||||
if (!chan->client_count)
|
||||
continue;
|
||||
if (!min)
|
||||
min = chan;
|
||||
else if (chan->table_count < min->table_count)
|
||||
min = chan;
|
||||
|
||||
if (n-- == 0) {
|
||||
ret = chan;
|
||||
break; /* done */
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
break; /* done */
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = min;
|
||||
|
||||
if (ret)
|
||||
ret->table_count++;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_channel_rebalance - redistribute the available channels
|
||||
*
|
||||
* Optimize for cpu isolation (each cpu gets a dedicated channel for an
|
||||
* operation type) in the SMP case, and operation isolation (avoid
|
||||
* multi-tasking channels) in the non-SMP case. Must be called under
|
||||
* dma_list_mutex.
|
||||
*/
|
||||
static void dma_channel_rebalance(void)
|
||||
{
|
||||
struct dma_chan *chan;
|
||||
struct dma_device *device;
|
||||
int cpu;
|
||||
int cap;
|
||||
int n;
|
||||
|
||||
/* undo the last distribution */
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
|
||||
|
||||
list_for_each_entry(device, &dma_device_list, global_node)
|
||||
list_for_each_entry(chan, &device->channels, device_node)
|
||||
chan->table_count = 0;
|
||||
|
||||
/* don't populate the channel_table if no clients are available */
|
||||
if (!dmaengine_ref_count)
|
||||
return;
|
||||
|
||||
/* redistribute available channels */
|
||||
n = 0;
|
||||
for_each_dma_cap_mask(cap, dma_cap_mask_all)
|
||||
for_each_online_cpu(cpu) {
|
||||
if (num_possible_cpus() > 1)
|
||||
chan = nth_chan(cap, n++);
|
||||
else
|
||||
chan = nth_chan(cap, -1);
|
||||
|
||||
per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_chans_notify_available - broadcast available channels to the clients
|
||||
*/
|
||||
|
@ -339,7 +497,12 @@ void dma_async_client_register(struct dma_client *client)
|
|||
dev_name(&chan->dev), err);
|
||||
}
|
||||
|
||||
|
||||
/* if this is the first reference and there were channels
|
||||
* waiting we need to rebalance to get those channels
|
||||
* incorporated into the channel table
|
||||
*/
|
||||
if (dmaengine_ref_count == 1)
|
||||
dma_channel_rebalance();
|
||||
list_add_tail(&client->global_node, &dma_client_list);
|
||||
mutex_unlock(&dma_list_mutex);
|
||||
}
|
||||
|
@ -473,6 +636,7 @@ int dma_async_device_register(struct dma_device *device)
|
|||
}
|
||||
}
|
||||
list_add_tail(&device->global_node, &dma_device_list);
|
||||
dma_channel_rebalance();
|
||||
mutex_unlock(&dma_list_mutex);
|
||||
|
||||
dma_clients_notify_available();
|
||||
|
@ -514,6 +678,7 @@ void dma_async_device_unregister(struct dma_device *device)
|
|||
|
||||
mutex_lock(&dma_list_mutex);
|
||||
list_del(&device->global_node);
|
||||
dma_channel_rebalance();
|
||||
mutex_unlock(&dma_list_mutex);
|
||||
|
||||
list_for_each_entry(chan, &device->channels, device_node) {
|
||||
|
@ -768,3 +933,4 @@ static int __init dma_bus_init(void)
|
|||
}
|
||||
subsys_initcall(dma_bus_init);
|
||||
|
||||
|
||||
|
|
|
@ -182,6 +182,7 @@ struct dma_chan_percpu {
|
|||
* @device_node: used to add this to the device chan list
|
||||
* @local: per-cpu pointer to a struct dma_chan_percpu
|
||||
* @client-count: how many clients are using this channel
|
||||
* @table_count: number of appearances in the mem-to-mem allocation table
|
||||
*/
|
||||
struct dma_chan {
|
||||
struct dma_device *device;
|
||||
|
@ -198,6 +199,7 @@ struct dma_chan {
|
|||
struct list_head device_node;
|
||||
struct dma_chan_percpu *local;
|
||||
int client_count;
|
||||
int table_count;
|
||||
};
|
||||
|
||||
#define to_dma_chan(p) container_of(p, struct dma_chan, dev)
|
||||
|
@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
|
|||
int dma_async_device_register(struct dma_device *device);
|
||||
void dma_async_device_unregister(struct dma_device *device);
|
||||
void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
|
||||
struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
|
||||
|
||||
/* --- Helper iov-locking functions --- */
|
||||
|
||||
|
|
Loading…
Reference in New Issue