1424 lines
36 KiB
C
Executable File
1424 lines
36 KiB
C
Executable File
#include <linux/seq_file.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/module.h>
|
|
#include <linux/psi.h>
|
|
#include <linux/rqm.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/sched/sysctl.h>
|
|
#include <linux/stacktrace.h>
|
|
#include <asm/irq_regs.h>
|
|
#include "../sched/sched.h"
|
|
#include <linux/cgroup.h>
|
|
#include <linux/sli.h>
|
|
#include <linux/rculist.h>
|
|
|
|
#define MAX_STACK_TRACE_DEPTH 64
|
|
|
|
static DEFINE_STATIC_KEY_FALSE(sli_enabled);
|
|
static DEFINE_STATIC_KEY_FALSE(sli_monitor_enabled);
|
|
|
|
static struct sli_event_monitor default_sli_event_monitor;
|
|
static struct workqueue_struct *sli_workqueue;
|
|
|
|
struct sli_event_control {
|
|
int event_type;
|
|
int event_id;
|
|
int period;
|
|
int mbuf_enable;
|
|
unsigned long long count;
|
|
unsigned long long threshold;
|
|
|
|
KABI_RESERVE(1);
|
|
KABI_RESERVE(2);
|
|
};
|
|
|
|
static const char *schedlat_theshold_name[] = {
|
|
"schedlat_wait_threshold=",
|
|
"schedlat_block_threshold=",
|
|
"schedlat_ioblock_threshold=",
|
|
"schedlat_sleep_threshold=",
|
|
"schedlat_longsys_threshold=",
|
|
"schedlat_rundelay_threshold=",
|
|
"schedlat_irqtime_threshold="
|
|
};
|
|
|
|
static const char *memlat_threshold_name[] = {
|
|
"memlat_global_direct_reclaim_threshold=",
|
|
"memlat_memcg_direct_reclaim_threshold=",
|
|
"memlat_direct_compact_threshold=",
|
|
"memlat_global_direct_swapout_threshold=",
|
|
"memlat_memcg_direct_swapout_threshold=",
|
|
"memlat_direct_swapin_threshold="
|
|
};
|
|
|
|
static const char *longterm_threshold_name[] = {
|
|
"longterm_rundelay_threshold=",
|
|
"longterm_irqtime_threshold="
|
|
};
|
|
|
|
static const char *sanity_check_abbr[] = {
|
|
"schedlat_",
|
|
"memlat_",
|
|
"longterm_",
|
|
"period=",
|
|
"mbuf_enable="
|
|
};
|
|
|
|
static void sli_proactive_monitor_work(struct work_struct *work);
|
|
static unsigned long sli_get_longterm_statistics(struct cgroup *cgrp,
|
|
enum sli_longterm_event event_id);
|
|
|
|
/*
|
|
* Convert the ULLONG_MAX to zero when show them to userspace, and convert the zero to
|
|
* ULLONG_MAX when write value to control interface.
|
|
*/
|
|
static inline u64 sli_convert_value(u64 value, bool control_show)
|
|
{
|
|
if (control_show && value == ULLONG_MAX)
|
|
return 0;
|
|
|
|
if (!control_show && value == 0)
|
|
value = ULLONG_MAX;
|
|
|
|
return value;
|
|
}
|
|
|
|
static void sli_event_monitor_init(struct sli_event_monitor *event_monitor, struct cgroup *cgrp)
|
|
{
|
|
INIT_LIST_HEAD_RCU(&event_monitor->event_head);
|
|
INIT_WORK(&event_monitor->sli_event_work, sli_proactive_monitor_work);
|
|
|
|
memset(&event_monitor->schedlat_threshold, 0xff, sizeof(event_monitor->schedlat_threshold));
|
|
memset(&event_monitor->schedlat_count, 0xff, sizeof(event_monitor->schedlat_count));
|
|
memset(&event_monitor->memlat_threshold, 0xff, sizeof(event_monitor->memlat_threshold));
|
|
memset(&event_monitor->memlat_count, 0xff, sizeof(event_monitor->memlat_count));
|
|
memset(&event_monitor->longterm_threshold, 0xff, sizeof(event_monitor->longterm_threshold));
|
|
|
|
event_monitor->last_update = jiffies;
|
|
event_monitor->cgrp = cgrp;
|
|
}
|
|
|
|
/* Inherit the monitoring event from parent cgroup or global sli_event_monitor */
|
|
static int sli_event_inherit(struct cgroup *cgrp)
|
|
{
|
|
struct sli_event *event, *event_tmp;
|
|
struct sli_event_monitor *event_monitor;
|
|
struct sli_event_monitor *cgrp_event_monitor = cgrp->cgrp_event_monitor;
|
|
|
|
if (cgroup_parent(cgrp)->cgrp_event_monitor)
|
|
event_monitor = cgroup_parent(cgrp)->cgrp_event_monitor;
|
|
else
|
|
event_monitor = &default_sli_event_monitor;
|
|
|
|
rcu_read_lock();
|
|
list_for_each_entry_rcu(event, &event_monitor->event_head, event_node) {
|
|
struct sli_event *new_event;
|
|
|
|
new_event = kmalloc(sizeof(struct sli_event), GFP_ATOMIC);
|
|
if (!new_event)
|
|
goto failed;
|
|
|
|
/*
|
|
* The event_type and event_id shoud not be observed before sli_event
|
|
* had been added to the list. We could guarantee the write order by
|
|
* smp_wmb(), and the reader could see the same order on the mainstream
|
|
* architecture(such as x86 and arm). But for some special architectures(
|
|
* such as DEC-alpha), it could break the data dependency relationship.
|
|
* So we add the READ_ONCE to maintain the date dependency even if in
|
|
* DEC-alpha architecture.
|
|
*/
|
|
new_event->event_type = READ_ONCE(event->event_type);
|
|
new_event->event_id = READ_ONCE(event->event_id);
|
|
|
|
switch (new_event->event_type) {
|
|
case SLI_SCHED_EVENT:
|
|
cgrp_event_monitor->schedlat_threshold[new_event->event_id] =
|
|
READ_ONCE(event_monitor->schedlat_threshold[new_event->event_id]);
|
|
cgrp_event_monitor->schedlat_count[new_event->event_id] =
|
|
READ_ONCE(event_monitor->schedlat_count[new_event->event_id]);
|
|
break;
|
|
case SLI_MEM_EVENT:
|
|
cgrp_event_monitor->memlat_threshold[new_event->event_id] =
|
|
READ_ONCE(event_monitor->memlat_threshold[new_event->event_id]);
|
|
cgrp_event_monitor->memlat_count[new_event->event_id] =
|
|
READ_ONCE(event_monitor->memlat_count[new_event->event_id]);
|
|
break;
|
|
case SLI_LONGTERM_EVENT:
|
|
cgrp_event_monitor->longterm_threshold[new_event->event_id] =
|
|
READ_ONCE(event_monitor->longterm_threshold[new_event->event_id]);
|
|
atomic_long_set(
|
|
&cgrp_event_monitor->longterm_statistics[new_event->event_id],
|
|
sli_get_longterm_statistics(cgrp, new_event->event_id));
|
|
break;
|
|
default:
|
|
printk(KERN_ERR "%s: invalid sli_event type!\n", __func__);
|
|
goto failed;
|
|
|
|
}
|
|
|
|
list_add(&new_event->event_node, &cgrp_event_monitor->event_head);
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
cgrp_event_monitor->period = READ_ONCE(event_monitor->period);
|
|
cgrp_event_monitor->mbuf_enable = READ_ONCE(event_monitor->mbuf_enable);
|
|
|
|
return 0;
|
|
|
|
failed:
|
|
rcu_read_unlock();
|
|
|
|
/* Free memory from the event list */
|
|
list_for_each_entry_safe(event, event_tmp,
|
|
&cgrp_event_monitor->event_head, event_node) {
|
|
list_del(&event->event_node);
|
|
kfree(event);
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void store_task_stack(struct task_struct *task, char *reason,
|
|
u64 duration, unsigned int skipnr)
|
|
{
|
|
unsigned long *entries;
|
|
unsigned nr_entries = 0;
|
|
unsigned long flags;
|
|
int i;
|
|
struct cgroup *cgrp;
|
|
|
|
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
|
|
GFP_ATOMIC);
|
|
if (!entries)
|
|
return;
|
|
|
|
nr_entries = stack_trace_save_tsk(task, entries, MAX_STACK_TRACE_DEPTH, skipnr);
|
|
|
|
cgrp = get_cgroup_from_task(task);
|
|
spin_lock_irqsave(&cgrp->cgrp_mbuf_lock, flags);
|
|
|
|
mbuf_print(cgrp, "record reason:%s comm:%s pid:%d duration=%lld\n",
|
|
reason, task->comm, task->pid, duration);
|
|
|
|
for (i = 0; i < nr_entries; i++)
|
|
mbuf_print(cgrp, "[<0>] %pB\n", (void *)entries[i]);
|
|
|
|
spin_unlock_irqrestore(&cgrp->cgrp_mbuf_lock, flags);
|
|
|
|
kfree(entries);
|
|
return;
|
|
}
|
|
|
|
static char * get_memlat_name(enum sli_memlat_stat_item sidx)
|
|
{
|
|
char *name = NULL;
|
|
|
|
switch (sidx) {
|
|
case MEM_LAT_GLOBAL_DIRECT_RECLAIM:
|
|
name = "memlat_global_direct_reclaim";
|
|
break;
|
|
case MEM_LAT_MEMCG_DIRECT_RECLAIM:
|
|
name = "memlat_memcg_direct_reclaim";
|
|
break;
|
|
case MEM_LAT_DIRECT_COMPACT:
|
|
name = "memlat_direct_compact";
|
|
break;
|
|
case MEM_LAT_GLOBAL_DIRECT_SWAPOUT:
|
|
name = "memlat_global_direct_swapout";
|
|
break;
|
|
case MEM_LAT_MEMCG_DIRECT_SWAPOUT:
|
|
name = "memlat_memcg_direct_swapout";
|
|
break;
|
|
case MEM_LAT_DIRECT_SWAPIN:
|
|
name = "memlat_direct_swapin";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return name;
|
|
}
|
|
|
|
static enum sli_lat_count get_lat_count_idx(u64 duration)
|
|
{
|
|
enum sli_lat_count idx;
|
|
|
|
duration = duration >> 20;
|
|
if (duration < 1)
|
|
idx = LAT_0_1;
|
|
else if (duration < 4)
|
|
idx = LAT_1_4;
|
|
else if (duration < 8)
|
|
idx = LAT_4_8;
|
|
else if (duration < 16)
|
|
idx = LAT_8_16;
|
|
else if (duration < 32)
|
|
idx = LAT_16_32;
|
|
else if (duration < 64)
|
|
idx = LAT_32_64;
|
|
else if (duration < 128)
|
|
idx = LAT_64_128;
|
|
else
|
|
idx = LAT_128_INF;
|
|
|
|
return idx;
|
|
}
|
|
|
|
static char * get_schedlat_name(enum sli_memlat_stat_item sidx)
|
|
{
|
|
char *name = NULL;
|
|
|
|
switch (sidx) {
|
|
case SCHEDLAT_WAIT:
|
|
name = "schedlat_wait";
|
|
break;
|
|
case SCHEDLAT_BLOCK:
|
|
name = "schedlat_block";
|
|
break;
|
|
case SCHEDLAT_IOBLOCK:
|
|
name = "schedlat_ioblock";
|
|
break;
|
|
case SCHEDLAT_SLEEP:
|
|
name = "schedlat_sleep";
|
|
break;
|
|
case SCHEDLAT_RUNDELAY:
|
|
name = "schedlat_rundelay";
|
|
break;
|
|
case SCHEDLAT_LONGSYS:
|
|
name = "schedlat_longsys";
|
|
break;
|
|
case SCHEDLAT_IRQTIME:
|
|
name = "schedlat_irqtime";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return name;
|
|
}
|
|
|
|
static char *get_longterm_name(enum sli_longterm_event sidx)
|
|
{
|
|
char *name = NULL;
|
|
|
|
switch (sidx) {
|
|
case SLI_LONGTERM_RUNDELAY:
|
|
name = "longterm_rundelay";
|
|
break;
|
|
case SLI_LONGTERM_IRQTIME:
|
|
name = "longterm_irqtime";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return name;
|
|
}
|
|
|
|
static u64 sli_memlat_stat_gather(struct cgroup *cgrp,
|
|
enum sli_memlat_stat_item sidx,
|
|
enum sli_lat_count cidx)
|
|
{
|
|
u64 sum = 0;
|
|
int cpu;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
sum += per_cpu_ptr(cgrp->sli_memlat_stat_percpu, cpu)->item[sidx][cidx];
|
|
|
|
return sum;
|
|
}
|
|
|
|
int sli_memlat_stat_show(struct seq_file *m, struct cgroup *cgrp)
|
|
{
|
|
enum sli_memlat_stat_item sidx;
|
|
|
|
if (!static_branch_likely(&sli_enabled)) {
|
|
seq_printf(m, "sli is not enabled, please echo 1 > /proc/sli/sli_enabled\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!cgrp->sli_memlat_stat_percpu)
|
|
return 0;
|
|
|
|
for (sidx = MEM_LAT_GLOBAL_DIRECT_RECLAIM;sidx < MEM_LAT_STAT_NR;sidx++) {
|
|
seq_printf(m, "%s:\n", get_memlat_name(sidx));
|
|
seq_printf(m, "0-1ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_0_1));
|
|
seq_printf(m, "1-4ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_1_4));
|
|
seq_printf(m, "4-8ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_4_8));
|
|
seq_printf(m, "8-16ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_8_16));
|
|
seq_printf(m, "16-32ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_16_32));
|
|
seq_printf(m, "32-64ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_32_64));
|
|
seq_printf(m, "64-128ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_64_128));
|
|
seq_printf(m, ">=128ms: %llu\n", sli_memlat_stat_gather(cgrp, sidx, LAT_128_INF));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int sli_memlat_max_show(struct seq_file *m, struct cgroup *cgrp)
|
|
{
|
|
enum sli_memlat_stat_item sidx;
|
|
|
|
if (!static_branch_likely(&sli_enabled)) {
|
|
seq_printf(m, "sli is not enabled, please echo 1 > /proc/sli/sli_enabled\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!cgrp->sli_memlat_stat_percpu)
|
|
return 0;
|
|
|
|
for (sidx = MEM_LAT_GLOBAL_DIRECT_RECLAIM; sidx < MEM_LAT_STAT_NR; sidx++) {
|
|
int cpu;
|
|
unsigned long latency_sum = 0;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
latency_sum += per_cpu_ptr(cgrp->sli_memlat_stat_percpu, cpu)->latency_max[sidx];
|
|
|
|
seq_printf(m, "%s: %lu\n", get_memlat_name(sidx), latency_sum);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void sli_memlat_stat_start(u64 *start)
|
|
{
|
|
if (!static_branch_likely(&sli_enabled))
|
|
*start = 0;
|
|
else
|
|
*start = local_clock();
|
|
}
|
|
|
|
void sli_memlat_stat_end(enum sli_memlat_stat_item sidx, u64 start)
|
|
{
|
|
struct mem_cgroup *memcg;
|
|
struct cgroup *cgrp;
|
|
|
|
if (!static_branch_likely(&sli_enabled) || start == 0)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
memcg = mem_cgroup_from_task(current);
|
|
if (!memcg || memcg == root_mem_cgroup)
|
|
goto out;
|
|
|
|
cgrp = memcg->css.cgroup;
|
|
if (cgrp && cgroup_parent(cgrp)) {
|
|
enum sli_lat_count cidx;
|
|
u64 duration;
|
|
|
|
duration = local_clock() - start;
|
|
cidx = get_lat_count_idx(duration);
|
|
|
|
duration = duration >> 10;
|
|
this_cpu_inc(cgrp->sli_memlat_stat_percpu->item[sidx][cidx]);
|
|
this_cpu_add(cgrp->sli_memlat_stat_percpu->latency_max[sidx], duration);
|
|
|
|
if (static_branch_unlikely(&sli_monitor_enabled)) {
|
|
struct sli_event_monitor *event_monitor = cgrp->cgrp_event_monitor;
|
|
|
|
if (duration < READ_ONCE(event_monitor->memlat_threshold[sidx]))
|
|
goto out;
|
|
|
|
atomic_long_inc(&event_monitor->memlat_statistics[sidx]);
|
|
|
|
if (event_monitor->mbuf_enable) {
|
|
char *lat_name;
|
|
|
|
lat_name = get_memlat_name(sidx);
|
|
store_task_stack(current, lat_name, duration, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
void sli_schedlat_stat(struct task_struct *task, enum sli_schedlat_stat_item sidx, u64 delta)
|
|
{
|
|
struct cgroup *cgrp = NULL;
|
|
|
|
if (!static_branch_likely(&sli_enabled) || !task)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
cgrp = get_cgroup_from_task(task);
|
|
if (cgrp && cgroup_parent(cgrp)) {
|
|
enum sli_lat_count cidx = get_lat_count_idx(delta);
|
|
|
|
delta = delta >> 10;
|
|
this_cpu_inc(cgrp->sli_schedlat_stat_percpu->item[sidx][cidx]);
|
|
this_cpu_add(cgrp->sli_schedlat_stat_percpu->latency_max[sidx], delta);
|
|
|
|
if (static_branch_unlikely(&sli_monitor_enabled)) {
|
|
struct sli_event_monitor *event_monitor = cgrp->cgrp_event_monitor;
|
|
|
|
if (delta < READ_ONCE(event_monitor->schedlat_threshold[sidx]))
|
|
goto out;
|
|
|
|
atomic_long_inc(&event_monitor->schedlat_statistics[sidx]);
|
|
|
|
if (event_monitor->mbuf_enable) {
|
|
char *lat_name;
|
|
|
|
lat_name = get_schedlat_name(sidx);
|
|
store_task_stack(task, lat_name, delta, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
void sli_schedlat_rundelay(struct task_struct *task, struct task_struct *prev, u64 delta)
|
|
{
|
|
enum sli_schedlat_stat_item sidx = SCHEDLAT_RUNDELAY;
|
|
struct cgroup *cgrp = NULL;
|
|
|
|
if (!static_branch_likely(&sli_enabled) || !task || !prev)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
cgrp = get_cgroup_from_task(task);
|
|
if (cgrp && cgroup_parent(cgrp)) {
|
|
enum sli_lat_count cidx = get_lat_count_idx(delta);
|
|
|
|
delta = delta >> 10;
|
|
this_cpu_inc(cgrp->sli_schedlat_stat_percpu->item[sidx][cidx]);
|
|
this_cpu_add(cgrp->sli_schedlat_stat_percpu->latency_max[sidx], delta);
|
|
|
|
if (static_branch_unlikely(&sli_monitor_enabled)) {
|
|
struct sli_event_monitor *event_monitor = cgrp->cgrp_event_monitor;
|
|
|
|
if (delta < READ_ONCE(event_monitor->schedlat_threshold[sidx]))
|
|
goto out;
|
|
|
|
atomic_long_inc(&event_monitor->schedlat_statistics[sidx]);
|
|
|
|
if (event_monitor->mbuf_enable) {
|
|
int i;
|
|
unsigned long *entries;
|
|
unsigned int nr_entries = 0;
|
|
unsigned long flags;
|
|
|
|
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
|
|
GFP_ATOMIC);
|
|
if (!entries)
|
|
goto out;
|
|
|
|
nr_entries = stack_trace_save_tsk(prev, entries,
|
|
MAX_STACK_TRACE_DEPTH, 0);
|
|
|
|
spin_lock_irqsave(&cgrp->cgrp_mbuf_lock, flags);
|
|
|
|
mbuf_print(cgrp, "record reason:schedlat_rundelay next_comm:%s "
|
|
"next_pid:%d prev_comm:%s prev_pid:%d duration=%lld\n",
|
|
task->comm, task->pid, prev->comm, prev->pid, delta);
|
|
|
|
for (i = 0; i < nr_entries; i++)
|
|
mbuf_print(cgrp, "[<0>] %pB\n", (void *)entries[i]);
|
|
|
|
spin_unlock_irqrestore(&cgrp->cgrp_mbuf_lock, flags);
|
|
kfree(entries);
|
|
}
|
|
}
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
void sli_check_longsys(struct task_struct *tsk)
|
|
{
|
|
long delta;
|
|
|
|
if (!static_branch_likely(&sli_enabled))
|
|
return;
|
|
|
|
if (tsk->sched_class != &fair_sched_class)
|
|
return ;
|
|
|
|
/* Longsys is performed only when TIF_RESCHED is set */
|
|
if (!test_tsk_need_resched(tsk))
|
|
return;
|
|
|
|
/* Kthread is not belong to any cgroup */
|
|
if (tsk->flags & PF_KTHREAD)
|
|
return;
|
|
|
|
if (!tsk->sched_info.kernel_exec_start ||
|
|
tsk->sched_info.task_switch != (tsk->nvcsw + tsk->nivcsw) ||
|
|
tsk->utime != tsk->sched_info.utime) {
|
|
tsk->sched_info.utime = tsk->utime;
|
|
tsk->sched_info.kernel_exec_start = rq_clock(task_rq(tsk));
|
|
tsk->sched_info.task_switch = tsk->nvcsw + tsk->nivcsw;
|
|
return;
|
|
}
|
|
|
|
delta = rq_clock(task_rq(tsk)) - tsk->sched_info.kernel_exec_start;
|
|
sli_schedlat_stat(tsk, SCHEDLAT_LONGSYS, delta);
|
|
}
|
|
|
|
static void sli_proactive_monitor_work(struct work_struct *work)
|
|
{
|
|
struct sli_event *event;
|
|
struct sli_notify_event *notify_event;
|
|
struct sli_event_monitor *event_monitor = container_of(work, struct sli_event_monitor,
|
|
sli_event_work);
|
|
|
|
notify_event = kzalloc(sizeof(struct sli_notify_event), GFP_KERNEL);
|
|
if (!notify_event)
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
list_for_each_entry_rcu(event, &event_monitor->event_head, event_node) {
|
|
u64 statistics, last_statistics, threshold;
|
|
|
|
switch (event->event_type) {
|
|
case SLI_SCHED_EVENT:
|
|
statistics = (u64)atomic_long_read(
|
|
&event_monitor->schedlat_statistics[event->event_id]);
|
|
atomic_long_set(&event_monitor->schedlat_statistics[event->event_id], 0);
|
|
|
|
if (event_monitor->overrun) {
|
|
event_monitor->overrun = 0;
|
|
break;
|
|
}
|
|
|
|
if (statistics >= READ_ONCE(event_monitor->schedlat_count[event->event_id]))
|
|
sli_event_add(notify_event, event->event_type,
|
|
event->event_id, statistics);
|
|
break;
|
|
case SLI_MEM_EVENT:
|
|
statistics = (u64)atomic_long_read(
|
|
&event_monitor->memlat_statistics[event->event_id]);
|
|
atomic_long_set(&event_monitor->memlat_statistics[event->event_id], 0);
|
|
|
|
if (event_monitor->overrun) {
|
|
event_monitor->overrun = 0;
|
|
break;
|
|
}
|
|
|
|
if (statistics >= READ_ONCE(event_monitor->memlat_count[event->event_id]))
|
|
sli_event_add(notify_event, event->event_type,
|
|
event->event_id, statistics);
|
|
break;
|
|
case SLI_LONGTERM_EVENT:
|
|
statistics = sli_get_longterm_statistics(event_monitor->cgrp,
|
|
event->event_id);
|
|
|
|
last_statistics = atomic_long_read(
|
|
&event_monitor->longterm_statistics[event->event_id]);
|
|
atomic_long_set(&event_monitor->longterm_statistics[event->event_id],
|
|
statistics);
|
|
|
|
if (event_monitor->overrun) {
|
|
event_monitor->overrun = 0;
|
|
break;
|
|
}
|
|
|
|
threshold = READ_ONCE(event_monitor->longterm_threshold[event->event_id]);
|
|
/* Deal with time wrapping correclty */
|
|
if ((long)(statistics - last_statistics - threshold) >= 0)
|
|
sli_event_add(notify_event, event->event_type,
|
|
event->event_id, (int)(statistics - last_statistics));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
/* Notify the userspace that the monitoring event had arrived */
|
|
sli_monitor_signal(event_monitor->cgrp, notify_event);
|
|
|
|
kfree(notify_event);
|
|
css_put(&event_monitor->cgrp->self);
|
|
}
|
|
|
|
void sli_update_tick(struct task_struct *tsk)
|
|
{
|
|
struct cgroup *cgrp;
|
|
|
|
if (!static_branch_likely(&sli_monitor_enabled))
|
|
return;
|
|
|
|
rcu_read_lock();
|
|
|
|
cgrp = get_cgroup_from_task(tsk);
|
|
if (cgrp && cgroup_parent(cgrp)) {
|
|
bool ret;
|
|
int period;
|
|
unsigned long long old_value, last_update;
|
|
|
|
period = cgrp->cgrp_event_monitor->period;
|
|
if (!period)
|
|
goto unlock;
|
|
|
|
retry:
|
|
last_update = READ_ONCE(cgrp->cgrp_event_monitor->last_update);
|
|
if (time_after((unsigned long)(period + last_update), jiffies))
|
|
goto unlock;
|
|
|
|
old_value = cmpxchg(&cgrp->cgrp_event_monitor->last_update,
|
|
last_update, jiffies);
|
|
if (old_value != last_update)
|
|
goto retry;
|
|
|
|
/*
|
|
* Current jiffies should be somewhere between period and 8 * period,
|
|
* otherwise we consider the it is overrun and should be abandoned.
|
|
*/
|
|
if (time_before((unsigned long)((period << 3) + last_update), jiffies))
|
|
cgrp->cgrp_event_monitor->overrun = 1;
|
|
|
|
rcu_read_unlock();
|
|
|
|
ret = css_tryget(&cgrp->self);
|
|
if (!ret)
|
|
return;
|
|
|
|
/*
|
|
* The sli trace work may have a lot a work to do, and should send
|
|
* the event to polling tasks. So we don't do the work in interrupt
|
|
* context(put the work to the workqueue).
|
|
*/
|
|
ret = queue_work(sli_workqueue, &cgrp->cgrp_event_monitor->sli_event_work);
|
|
/*
|
|
* If work had been pushed to workqueue and not been executed, there is no
|
|
* need to push it again. So we must put the css refcount.
|
|
*/
|
|
if (!ret)
|
|
css_put(&cgrp->self);
|
|
return;
|
|
}
|
|
|
|
unlock:
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
static u64 sli_schedlat_stat_gather(struct cgroup *cgrp,
|
|
enum sli_schedlat_stat_item sidx,
|
|
enum sli_lat_count cidx)
|
|
{
|
|
u64 sum = 0;
|
|
int cpu;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
sum += per_cpu_ptr(cgrp->sli_schedlat_stat_percpu, cpu)->item[sidx][cidx];
|
|
|
|
return sum;
|
|
}
|
|
|
|
int sli_schedlat_max_show(struct seq_file *m, struct cgroup *cgrp)
|
|
{
|
|
enum sli_schedlat_stat_item sidx;
|
|
|
|
if (!static_branch_likely(&sli_enabled)) {
|
|
seq_printf(m, "sli is not enabled, please echo 1 > /proc/sli/sli_enabled\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!cgrp->sli_schedlat_stat_percpu)
|
|
return 0;
|
|
|
|
for (sidx = SCHEDLAT_WAIT; sidx < SCHEDLAT_STAT_NR; sidx++) {
|
|
int cpu;
|
|
unsigned long latency_sum = 0;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
latency_sum += per_cpu_ptr(cgrp->sli_schedlat_stat_percpu, cpu)->latency_max[sidx];
|
|
|
|
seq_printf(m, "%s: %lu\n", get_schedlat_name(sidx), latency_sum);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int sli_schedlat_stat_show(struct seq_file *m, struct cgroup *cgrp)
|
|
{
|
|
enum sli_schedlat_stat_item sidx;
|
|
|
|
if (!static_branch_likely(&sli_enabled)) {
|
|
seq_printf(m, "sli is not enabled, please echo 1 > /proc/sli/sli_enabled\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!cgrp->sli_schedlat_stat_percpu)
|
|
return 0;
|
|
|
|
for (sidx = SCHEDLAT_WAIT;sidx < SCHEDLAT_STAT_NR;sidx++) {
|
|
seq_printf(m, "%s:\n", get_schedlat_name(sidx));
|
|
seq_printf(m, "0-1ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_0_1));
|
|
seq_printf(m, "1-4ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_1_4));
|
|
seq_printf(m, "4-8ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_4_8));
|
|
seq_printf(m, "8-16ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_8_16));
|
|
seq_printf(m, "16-32ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_16_32));
|
|
seq_printf(m, "32-64ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_32_64));
|
|
seq_printf(m, "64-128ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_64_128));
|
|
seq_printf(m, ">=128ms: %llu\n", sli_schedlat_stat_gather(cgrp, sidx, LAT_128_INF));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long sli_get_longterm_statistics(struct cgroup *cgrp,
|
|
enum sli_longterm_event event_id)
|
|
{
|
|
int cpu, index;
|
|
unsigned long latency_sum = 0;
|
|
|
|
index = SCHEDLAT_RUNDELAY + event_id;
|
|
for_each_possible_cpu(cpu)
|
|
latency_sum += READ_ONCE(per_cpu_ptr(cgrp->sli_schedlat_stat_percpu,
|
|
cpu)->latency_max[index]);
|
|
|
|
return latency_sum;
|
|
}
|
|
|
|
static inline int sli_parse_threshold(char *buf, struct sli_event_control *sec)
|
|
{
|
|
char *str;
|
|
int i, len, ret;
|
|
u64 value;
|
|
|
|
/* Replace the delimiter with '\0' */
|
|
len = strlen(buf);
|
|
for (i = 0; i < len; i++) {
|
|
if (buf[i] == ',' || buf[i] == ' ') {
|
|
buf[i] = '\0';
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == len)
|
|
return -EINVAL;
|
|
|
|
/* Parse the value for theshold */
|
|
ret = kstrtou64(buf, 0, &value);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->threshold = sli_convert_value(value, false);
|
|
|
|
/* Move the pointer to the positon which after the delimiter */
|
|
buf += (i + 1);
|
|
len -= (i + 1);
|
|
|
|
/* Parse the value for count(if it exist) */
|
|
str = strnstr(buf, "count=", len);
|
|
if (!str)
|
|
return -EINVAL;
|
|
|
|
str += strlen("count=");
|
|
ret = kstrtou64(str, 0, &value);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->count = sli_convert_value(value, false);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sli_parse_parameter(char *buf, int len, struct sli_event_control *sec, int index)
|
|
{
|
|
int i, min_len, ret = 0;
|
|
u64 value;
|
|
|
|
switch (index) {
|
|
case 0:
|
|
for (i = 0; i < ARRAY_SIZE(schedlat_theshold_name); i++) {
|
|
min_len = min(len, (int)strlen(schedlat_theshold_name[i]));
|
|
if (!strncmp(schedlat_theshold_name[i], buf, min_len))
|
|
break;
|
|
}
|
|
|
|
if (i == ARRAY_SIZE(schedlat_theshold_name))
|
|
return -EINVAL;
|
|
|
|
buf += min_len;
|
|
ret = sli_parse_threshold(buf, sec);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->event_type = SLI_SCHED_EVENT;
|
|
sec->event_id = i;
|
|
break;
|
|
case 1:
|
|
for (i = 0; i < ARRAY_SIZE(memlat_threshold_name); i++) {
|
|
min_len = min(len, (int)strlen((const char *)memlat_threshold_name[i]));
|
|
if (!strncmp(memlat_threshold_name[i], buf, min_len))
|
|
break;
|
|
}
|
|
|
|
if (i == ARRAY_SIZE(memlat_threshold_name))
|
|
return -EINVAL;
|
|
|
|
buf += min_len;
|
|
ret = sli_parse_threshold(buf, sec);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->event_type = SLI_MEM_EVENT;
|
|
sec->event_id = i;
|
|
break;
|
|
case 2:
|
|
for (i = 0; i < ARRAY_SIZE(longterm_threshold_name); i++) {
|
|
min_len = min(len, (int)strlen((const char *)longterm_threshold_name[i]));
|
|
if (!strncmp(longterm_threshold_name[i], buf, min_len))
|
|
break;
|
|
}
|
|
|
|
if (i == ARRAY_SIZE(longterm_threshold_name))
|
|
return -EINVAL;
|
|
|
|
buf += min_len;
|
|
ret = sli_parse_threshold(buf, sec);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->event_type = SLI_LONGTERM_EVENT;
|
|
sec->event_id = i;
|
|
break;
|
|
case 3:
|
|
buf += strlen("period=");
|
|
ret = kstrtou64(buf, 0, &value);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->period = usecs_to_jiffies(value);
|
|
break;
|
|
case 4:
|
|
buf += strlen("mbuf_enable=");
|
|
ret = kstrtou64(buf, 0, &value);
|
|
if (ret)
|
|
return ret;
|
|
|
|
sec->mbuf_enable = !!value;
|
|
break;
|
|
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sli_sanity_check(char *buf, struct sli_event_control *sec)
|
|
{
|
|
int i, len, min_len;
|
|
|
|
buf = strstrip(buf);
|
|
if (!buf)
|
|
return -EINVAL;
|
|
|
|
len = strlen(buf);
|
|
for (i = 0; i < ARRAY_SIZE(sanity_check_abbr); i++) {
|
|
min_len = min(len, (int)strlen(sanity_check_abbr[i]));
|
|
|
|
if (!strncmp(sanity_check_abbr[i], buf, min_len))
|
|
break;
|
|
}
|
|
|
|
/* The input string is not match with entries in the list */
|
|
if (i == ARRAY_SIZE(sanity_check_abbr))
|
|
return -EINVAL;
|
|
|
|
return sli_parse_parameter(buf, len, sec, i);
|
|
}
|
|
|
|
static int sli_event_update(struct sli_event_monitor *event_monitor,
|
|
struct sli_event_control *sec, u64 last_threshold)
|
|
{
|
|
struct sli_event *event;
|
|
|
|
/* Add the sli event */
|
|
if (last_threshold == ULLONG_MAX && sec->threshold != ULLONG_MAX) {
|
|
event = kmalloc(sizeof(struct sli_event), GFP_KERNEL);
|
|
if (!event)
|
|
return -ENOMEM;
|
|
|
|
event->event_type = sec->event_type;
|
|
event->event_id = sec->event_id;
|
|
/* event_type and event_id assignment should be done before add entry to list */
|
|
smp_wmb();
|
|
list_add_rcu(&event->event_node, &event_monitor->event_head);
|
|
} else if (last_threshold != ULLONG_MAX && sec->threshold == ULLONG_MAX) {
|
|
list_for_each_entry(event, &event_monitor->event_head, event_node) {
|
|
if (event->event_type != sec->event_type)
|
|
continue;
|
|
|
|
if (event->event_id != sec->event_id)
|
|
continue;
|
|
|
|
list_del_rcu(&event->event_node);
|
|
kfree_rcu(event, rcu);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
ssize_t cgroup_sli_control_write(struct kernfs_open_file *of, char *buf,
|
|
size_t nbytes, loff_t off)
|
|
{
|
|
int ret;
|
|
struct cgroup *cgrp;
|
|
struct sli_event_monitor *event_monitor;
|
|
struct sli_event_control sec = {.event_type = -1, .period = -1, .mbuf_enable = -1,};
|
|
|
|
cgrp = of_css(of)->cgroup;
|
|
if (cgroup_parent(cgrp))
|
|
event_monitor = cgrp->cgrp_event_monitor;
|
|
else
|
|
event_monitor = &default_sli_event_monitor;
|
|
|
|
inode_lock(file_inode(of->file));
|
|
ret = sli_sanity_check(buf, &sec);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (sec.period != -1) {
|
|
if (!!event_monitor->period == !!sec.period) {
|
|
WRITE_ONCE(event_monitor->period, sec.period);
|
|
goto out;
|
|
}
|
|
|
|
WRITE_ONCE(event_monitor->period, sec.period);
|
|
|
|
if (cgroup_parent(cgrp) || event_monitor->mbuf_enable)
|
|
goto out;
|
|
|
|
if (sec.period)
|
|
static_branch_enable(&sli_monitor_enabled);
|
|
else
|
|
static_branch_disable(&sli_monitor_enabled);
|
|
|
|
goto out;
|
|
}
|
|
|
|
if (sec.mbuf_enable != -1) {
|
|
if (sec.mbuf_enable == event_monitor->mbuf_enable)
|
|
goto out;
|
|
|
|
WRITE_ONCE(event_monitor->mbuf_enable, sec.mbuf_enable);
|
|
|
|
if (cgroup_parent(cgrp) || event_monitor->period)
|
|
goto out;
|
|
|
|
if (sec.mbuf_enable)
|
|
static_branch_enable(&sli_monitor_enabled);
|
|
else
|
|
static_branch_disable(&sli_monitor_enabled);
|
|
|
|
goto out;
|
|
}
|
|
|
|
if (sec.event_type != -1) {
|
|
unsigned long long last_threshold;
|
|
|
|
switch (sec.event_type) {
|
|
case SLI_SCHED_EVENT:
|
|
last_threshold = event_monitor->schedlat_threshold[sec.event_id];
|
|
WRITE_ONCE(event_monitor->schedlat_threshold[sec.event_id], sec.threshold);
|
|
WRITE_ONCE(event_monitor->schedlat_count[sec.event_id], sec.count);
|
|
smp_wmb();
|
|
atomic_long_set(&event_monitor->schedlat_statistics[sec.event_id], 0);
|
|
ret = sli_event_update(event_monitor, &sec, last_threshold);
|
|
break;
|
|
case SLI_MEM_EVENT:
|
|
last_threshold = event_monitor->memlat_threshold[sec.event_id];
|
|
WRITE_ONCE(event_monitor->memlat_threshold[sec.event_id], sec.threshold);
|
|
WRITE_ONCE(event_monitor->memlat_count[sec.event_id], sec.count);
|
|
smp_wmb();
|
|
atomic_long_set(&event_monitor->memlat_statistics[sec.event_id], 0);
|
|
ret = sli_event_update(event_monitor, &sec, last_threshold);
|
|
break;
|
|
case SLI_LONGTERM_EVENT:
|
|
last_threshold = event_monitor->longterm_threshold[sec.event_id];
|
|
WRITE_ONCE(event_monitor->longterm_threshold[sec.event_id], sec.threshold);
|
|
smp_wmb();
|
|
if (cgroup_parent(cgrp))
|
|
atomic_long_set(&event_monitor->longterm_statistics[sec.event_id],
|
|
sli_get_longterm_statistics(cgrp, sec.event_id));
|
|
ret = sli_event_update(event_monitor, &sec, last_threshold);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
out:
|
|
if (!ret)
|
|
ret = nbytes;
|
|
inode_unlock(file_inode(of->file));
|
|
return ret;
|
|
}
|
|
|
|
int cgroup_sli_control_show(struct seq_file *sf, void *v)
|
|
{
|
|
int i;
|
|
unsigned long long threshold, count;
|
|
struct cgroup *cgrp;
|
|
struct sli_event_monitor *event_monitor;
|
|
|
|
cgrp = seq_css(sf)->cgroup;
|
|
if (cgroup_parent(cgrp))
|
|
event_monitor = cgrp->cgrp_event_monitor;
|
|
else
|
|
event_monitor = &default_sli_event_monitor;
|
|
|
|
seq_printf(sf, "period: %d\n", event_monitor->period);
|
|
seq_printf(sf, "mbuf_enable: %d\n", event_monitor->mbuf_enable);
|
|
|
|
for (i = 0; i < SCHEDLAT_STAT_NR; i++) {
|
|
threshold = sli_convert_value(event_monitor->schedlat_threshold[i], true);
|
|
count = sli_convert_value(event_monitor->schedlat_count[i], true);
|
|
|
|
seq_printf(sf, "%s: threshold: %llu, count: %llu\n", get_schedlat_name(i),
|
|
threshold, count);
|
|
}
|
|
|
|
for (i = 0; i < MEM_LAT_STAT_NR; i++) {
|
|
threshold = sli_convert_value(event_monitor->memlat_threshold[i], true);
|
|
count = sli_convert_value(event_monitor->memlat_count[i], true);
|
|
|
|
seq_printf(sf, "%s: threshold: %llu, count: %llu\n", get_memlat_name(i),
|
|
threshold, count);
|
|
}
|
|
|
|
for (i = 0; i < SLI_LONGTERM_NR; i++) {
|
|
threshold = sli_convert_value(event_monitor->longterm_threshold[i], true);
|
|
|
|
seq_printf(sf, "%s: threshold: %llu\n", get_longterm_name(i), threshold);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* sli monitor function*/
|
|
struct sli_notify_ctx* sctx_alloc(void)
|
|
{
|
|
struct sli_notify_ctx *sctx;
|
|
|
|
sctx = kzalloc(sizeof(struct sli_notify_ctx), GFP_KERNEL);
|
|
if (sctx) {
|
|
/* Do init work */
|
|
init_waitqueue_head(&sctx->wqh);
|
|
spin_lock_init(&sctx->notify_lock);
|
|
}
|
|
|
|
return sctx;
|
|
}
|
|
|
|
void sctx_free(struct cgroup *cgrp)
|
|
{
|
|
if (cgrp->sctx) {
|
|
kfree(cgrp->sctx);
|
|
cgrp->sctx = NULL;
|
|
}
|
|
}
|
|
|
|
static int sli_monitor_exchange(struct sli_notify_event *tnotify_event,
|
|
struct sli_notify_event *snotify_event)
|
|
{
|
|
memcpy(tnotify_event->notify_vector, snotify_event->notify_vector,
|
|
sizeof(struct sli_notify_event));
|
|
memset(snotify_event->notify_vector, 0, sizeof(struct sli_notify_event));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int sli_monitor_open(struct kernfs_open_file *of)
|
|
{
|
|
struct file *filp = of->file;
|
|
int ret = 0;
|
|
|
|
filp->f_mode &= FMODE_READ;
|
|
|
|
if (!filp->f_mode & FMODE_READ)
|
|
ret = -EINVAL;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline void notify_event_print(struct seq_file *seq,
|
|
struct sli_notify_event *notify_event,
|
|
u32 event_type, u32 levent_max)
|
|
{
|
|
int index;
|
|
u32 count;
|
|
|
|
for (index = 0; index < levent_max; index++) {
|
|
count = notify_event->notify_vector[event_type][index];
|
|
/*
|
|
* Only print when event count > 0, print format:
|
|
* event_type event_item event_count
|
|
*/
|
|
if (count > 0)
|
|
seq_printf(seq, "%u %u %u\n", event_type, index, count);
|
|
}
|
|
}
|
|
|
|
int sli_monitor_show(struct seq_file *seq, void *v)
|
|
{
|
|
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
|
struct sli_notify_event notify_event;
|
|
unsigned long flags;
|
|
int i = 0;
|
|
|
|
if (cgrp && cgrp->sctx) {
|
|
|
|
spin_lock_irqsave(&cgrp->sctx->notify_lock, flags);
|
|
sli_monitor_exchange(¬ify_event, &cgrp->sctx->notify_event);
|
|
spin_unlock_irqrestore(&cgrp->sctx->notify_lock, flags);
|
|
|
|
for (i = 0; i < SLI_EVENT_NR; i++)
|
|
notify_event_print(seq, ¬ify_event, i, SLI_ITEM_MAX);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *sli_monitor_start(struct seq_file *s, loff_t *pos)
|
|
{
|
|
return NULL + !*pos;
|
|
}
|
|
|
|
/* seq_next function is necessary for seq_read */
|
|
void *sli_monitor_next(struct seq_file *s, void *v, loff_t *pos)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void sli_monitor_stop(struct seq_file *seq, void *v)
|
|
{
|
|
/* must reset index, so next read can begin from 0 */
|
|
if (!seq->count)
|
|
seq->index = 0;
|
|
}
|
|
|
|
static inline bool is_notify_active(struct sli_notify_event *ne)
|
|
{
|
|
int index_e;
|
|
int index_i;
|
|
|
|
/* Any sli event count > 0 will be active */
|
|
for (index_e = 0; index_e < SLI_EVENT_NR; index_e++) {
|
|
for (index_i = 0; index_i < SLI_ITEM_MAX; index_i++)
|
|
if (ne->notify_vector[index_e][index_i] > 0)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
__poll_t sli_monitor_poll(struct kernfs_open_file *of,
|
|
poll_table *pt)
|
|
{
|
|
struct cgroup *cgrp = of->kn->parent->priv;
|
|
struct file *filp = of->file;
|
|
struct sli_notify_ctx *sctx;
|
|
__poll_t events = 0;
|
|
bool active;
|
|
unsigned long flags;
|
|
|
|
sctx = cgrp->sctx;
|
|
if (!sctx) {
|
|
pr_err("sli:can not find sctx for cgroup [ %s ]", of->kn->name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
poll_wait(filp, &sctx->wqh, pt);
|
|
|
|
/* Must hold notify_event lock */
|
|
spin_lock_irqsave(&cgrp->sctx->notify_lock, flags);
|
|
active = is_notify_active(&sctx->notify_event);
|
|
spin_unlock_irqrestore(&cgrp->sctx->notify_lock, flags);
|
|
|
|
if (active)
|
|
events |= EPOLLIN;
|
|
|
|
return events;
|
|
|
|
}
|
|
|
|
int sli_event_add(struct sli_notify_event *notify_event,
|
|
u32 event_type, u32 levent, u32 count)
|
|
{
|
|
int res = 0;
|
|
|
|
if (!notify_event) {
|
|
pr_err("sli: target notify_event is NULL\n");
|
|
res = -1;
|
|
goto end;
|
|
}
|
|
|
|
if (event_type >= SLI_EVENT_NR || levent > SLI_ITEM_MAX) {
|
|
pr_err("sli: invalid sli event type [ %u ] or sli item [ %u ]\n",
|
|
event_type, levent);
|
|
res = -1;
|
|
goto end;
|
|
}
|
|
notify_event->notify_vector[event_type][levent] = count;
|
|
|
|
end:
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(sli_event_add);
|
|
|
|
u32 sli_monitor_signal(struct cgroup *cgrp, struct sli_notify_event *notify_event)
|
|
{
|
|
unsigned long flags;
|
|
struct sli_notify_ctx *sctx;
|
|
|
|
if (!cgrp->sctx) {
|
|
pr_err("sli:can not find notify info for cgroup:[ %s ]\n", cgrp->kn->name);
|
|
return 0;
|
|
}
|
|
|
|
sctx = cgrp->sctx;
|
|
|
|
spin_lock_irqsave(&sctx->notify_lock, flags);
|
|
sli_monitor_exchange(&cgrp->sctx->notify_event, notify_event);
|
|
spin_unlock_irqrestore(&sctx->notify_lock, flags);
|
|
|
|
if (waitqueue_active(&sctx->wqh))
|
|
wake_up_poll(&sctx->wqh, EPOLLIN);
|
|
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(sli_monitor_signal);
|
|
|
|
static int sli_enabled_show(struct seq_file *m, void *v)
|
|
{
|
|
seq_printf(m, "%d\n", static_key_enabled(&sli_enabled));
|
|
return 0;
|
|
}
|
|
|
|
static int sli_enabled_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, sli_enabled_show, NULL);
|
|
}
|
|
|
|
static ssize_t sli_enabled_write(struct file *file, const char __user *ubuf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
char val = -1;
|
|
int ret = count;
|
|
|
|
if (count < 1 || *ppos) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
if (copy_from_user(&val, ubuf, 1)) {
|
|
ret = -EFAULT;
|
|
goto out;
|
|
}
|
|
|
|
switch (val) {
|
|
case '0':
|
|
if (static_key_enabled(&sli_enabled))
|
|
static_branch_disable(&sli_enabled);
|
|
break;
|
|
case '1':
|
|
if (!static_key_enabled(&sli_enabled))
|
|
static_branch_enable(&sli_enabled);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static const struct file_operations sli_enabled_fops = {
|
|
.open = sli_enabled_open,
|
|
.read = seq_read,
|
|
.write = sli_enabled_write,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
int sli_cgroup_alloc(struct cgroup *cgroup)
|
|
{
|
|
if (!cgroup_need_sli(cgroup))
|
|
return 0;
|
|
|
|
spin_lock_init(&cgroup->cgrp_mbuf_lock);
|
|
cgroup->sli_memlat_stat_percpu = alloc_percpu(struct sli_memlat_stat);
|
|
if (!cgroup->sli_memlat_stat_percpu)
|
|
goto out;
|
|
|
|
cgroup->sli_schedlat_stat_percpu = alloc_percpu(struct sli_schedlat_stat);
|
|
if (!cgroup->sli_schedlat_stat_percpu)
|
|
goto free_memlat_percpu;
|
|
|
|
cgroup->cgrp_event_monitor = kzalloc(sizeof(struct sli_event_monitor), GFP_KERNEL);
|
|
if (!cgroup->cgrp_event_monitor)
|
|
goto free_schelat_percpu;
|
|
|
|
sli_event_monitor_init(cgroup->cgrp_event_monitor, cgroup);
|
|
if (sli_event_inherit(cgroup))
|
|
goto free_cgrp_event;
|
|
|
|
return 0;
|
|
|
|
free_cgrp_event:
|
|
kfree(cgroup->cgrp_event_monitor);
|
|
free_schelat_percpu:
|
|
free_percpu(cgroup->sli_schedlat_stat_percpu);
|
|
free_memlat_percpu:
|
|
free_percpu(cgroup->sli_memlat_stat_percpu);
|
|
out:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void sli_cgroup_free(struct cgroup *cgroup)
|
|
{
|
|
struct sli_event *event, *event_tmp;
|
|
|
|
/*
|
|
* Cgroup's subsys would be cleared before sli_cgroup_free() had been called.
|
|
* So we use !cgroup->cgrp_event_monitor instead of cgroup_need_sli to check
|
|
* whether the cgroup'smemory should be freed here.
|
|
*/
|
|
if (!cgroup->cgrp_event_monitor)
|
|
return;
|
|
|
|
free_percpu(cgroup->sli_memlat_stat_percpu);
|
|
free_percpu(cgroup->sli_schedlat_stat_percpu);
|
|
/* Free memory from the event list */
|
|
list_for_each_entry_safe(event, event_tmp,
|
|
&cgroup->cgrp_event_monitor->event_head, event_node) {
|
|
list_del(&event->event_node);
|
|
kfree(event);
|
|
}
|
|
kfree(cgroup->cgrp_event_monitor);
|
|
}
|
|
|
|
static int __init sli_proc_init(void)
|
|
{
|
|
sli_event_monitor_init(&default_sli_event_monitor, NULL);
|
|
sli_workqueue = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
|
|
if (!sli_workqueue) {
|
|
printk(KERN_ERR "Create sli workqueue failed!\n");
|
|
return -1;
|
|
}
|
|
proc_mkdir("sli", NULL);
|
|
proc_create("sli/sli_enabled", 0, NULL, &sli_enabled_fops);
|
|
return 0;
|
|
}
|
|
|
|
late_initcall(sli_proc_init);
|
|
|