Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (25 commits) sched: Fix SCHED_MC regression caused by change in sched cpu_power sched: Don't use possibly stale sched_class kthread, sched: Remove reference to kthread_create_on_cpu sched: cpuacct: Use bigger percpu counter batch values for stats counters percpu_counter: Make __percpu_counter_add an inline function on UP sched: Remove member rt_se from struct rt_rq sched: Change usage of rt_rq->rt_se to rt_rq->tg->rt_se[cpu] sched: Remove unused update_shares_locked() sched: Use for_each_bit sched: Queue a deboosted task to the head of the RT prio queue sched: Implement head queueing for sched_rt sched: Extend enqueue_task to allow head queueing sched: Remove USER_SCHED sched: Fix the place where group powers are updated sched: Assume *balance is valid sched: Remove load_balance_newidle() sched: Unify load_balance{,_newidle}() sched: Add a lock break for PREEMPT=y sched: Remove from fwd decls sched: Remove rq_iterator from move_one_task ... Fix up trivial conflicts in kernel/sched.c
This commit is contained in:
commit
f66ffdedbf
|
@ -6,21 +6,6 @@ be removed from this file.
|
|||
|
||||
---------------------------
|
||||
|
||||
What: USER_SCHED
|
||||
When: 2.6.34
|
||||
|
||||
Why: USER_SCHED was implemented as a proof of concept for group scheduling.
|
||||
The effect of USER_SCHED can already be achieved from userspace with
|
||||
the help of libcgroup. The removal of USER_SCHED will also simplify
|
||||
the scheduler code with the removal of one major ifdef. There are also
|
||||
issues USER_SCHED has with USER_NS. A decision was taken not to fix
|
||||
those and instead remove USER_SCHED. Also new group scheduling
|
||||
features will not be implemented for USER_SCHED.
|
||||
|
||||
Who: Dhaval Giani <dhaval@linux.vnet.ibm.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: PRISM54
|
||||
When: 2.6.34
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ extern int _cond_resched(void);
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
|
||||
void __might_sleep(char *file, int line, int preempt_offset);
|
||||
void __might_sleep(const char *file, int line, int preempt_offset);
|
||||
/**
|
||||
* might_sleep - annotation for functions that can sleep
|
||||
*
|
||||
|
@ -138,7 +138,8 @@ extern int _cond_resched(void);
|
|||
# define might_sleep() \
|
||||
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
|
||||
#else
|
||||
static inline void __might_sleep(char *file, int line, int preempt_offset) { }
|
||||
static inline void __might_sleep(const char *file, int line,
|
||||
int preempt_offset) { }
|
||||
# define might_sleep() do { might_resched(); } while (0)
|
||||
#endif
|
||||
|
||||
|
|
|
@ -98,9 +98,6 @@ static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
|
|||
fbc->count = amount;
|
||||
}
|
||||
|
||||
#define __percpu_counter_add(fbc, amount, batch) \
|
||||
percpu_counter_add(fbc, amount)
|
||||
|
||||
static inline void
|
||||
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
||||
{
|
||||
|
@ -109,6 +106,12 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
|||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void
|
||||
__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
|
||||
{
|
||||
percpu_counter_add(fbc, amount);
|
||||
}
|
||||
|
||||
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
|
||||
{
|
||||
return fbc->count;
|
||||
|
|
|
@ -740,14 +740,6 @@ struct user_struct {
|
|||
uid_t uid;
|
||||
struct user_namespace *user_ns;
|
||||
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
struct task_group *tg;
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj;
|
||||
struct delayed_work work;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
atomic_long_t locked_vm;
|
||||
#endif
|
||||
|
@ -1087,7 +1079,8 @@ struct sched_domain;
|
|||
struct sched_class {
|
||||
const struct sched_class *next;
|
||||
|
||||
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
|
||||
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
|
||||
bool head);
|
||||
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
|
||||
void (*yield_task) (struct rq *rq);
|
||||
|
||||
|
@ -1099,14 +1092,6 @@ struct sched_class {
|
|||
#ifdef CONFIG_SMP
|
||||
int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
|
||||
|
||||
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
|
||||
struct rq *busiest, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio);
|
||||
|
||||
int (*move_one_task) (struct rq *this_rq, int this_cpu,
|
||||
struct rq *busiest, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle);
|
||||
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
|
||||
void (*post_schedule) (struct rq *this_rq);
|
||||
void (*task_waking) (struct rq *this_rq, struct task_struct *task);
|
||||
|
@ -2520,13 +2505,9 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
|
|||
|
||||
extern void normalize_rt_tasks(void);
|
||||
|
||||
#ifdef CONFIG_GROUP_SCHED
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
|
||||
extern struct task_group init_task_group;
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
extern struct task_group root_task_group;
|
||||
extern void set_tg_uid(struct user_struct *user);
|
||||
#endif
|
||||
|
||||
extern struct task_group *sched_create_group(struct task_group *parent);
|
||||
extern void sched_destroy_group(struct task_group *tg);
|
||||
|
|
81
init/Kconfig
81
init/Kconfig
|
@ -461,57 +461,6 @@ config LOG_BUF_SHIFT
|
|||
config HAVE_UNSTABLE_SCHED_CLOCK
|
||||
bool
|
||||
|
||||
config GROUP_SCHED
|
||||
bool "Group CPU scheduler"
|
||||
depends on EXPERIMENTAL
|
||||
default n
|
||||
help
|
||||
This feature lets CPU scheduler recognize task groups and control CPU
|
||||
bandwidth allocation to such task groups.
|
||||
In order to create a group from arbitrary set of processes, use
|
||||
CONFIG_CGROUPS. (See Control Group support.)
|
||||
|
||||
config FAIR_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_OTHER"
|
||||
depends on GROUP_SCHED
|
||||
default GROUP_SCHED
|
||||
|
||||
config RT_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_RR/FIFO"
|
||||
depends on EXPERIMENTAL
|
||||
depends on GROUP_SCHED
|
||||
default n
|
||||
help
|
||||
This feature lets you explicitly allocate real CPU bandwidth
|
||||
to users or control groups (depending on the "Basis for grouping tasks"
|
||||
setting below. If enabled, it will also make it impossible to
|
||||
schedule realtime tasks for non-root users until you allocate
|
||||
realtime bandwidth for them.
|
||||
See Documentation/scheduler/sched-rt-group.txt for more information.
|
||||
|
||||
choice
|
||||
depends on GROUP_SCHED
|
||||
prompt "Basis for grouping tasks"
|
||||
default USER_SCHED
|
||||
|
||||
config USER_SCHED
|
||||
bool "user id"
|
||||
help
|
||||
This option will choose userid as the basis for grouping
|
||||
tasks, thus providing equal CPU bandwidth to each user.
|
||||
|
||||
config CGROUP_SCHED
|
||||
bool "Control groups"
|
||||
depends on CGROUPS
|
||||
help
|
||||
This option allows you to create arbitrary task groups
|
||||
using the "cgroup" pseudo filesystem and control
|
||||
the cpu bandwidth allocated to each such task group.
|
||||
Refer to Documentation/cgroups/cgroups.txt for more
|
||||
information on "cgroup" pseudo filesystem.
|
||||
|
||||
endchoice
|
||||
|
||||
menuconfig CGROUPS
|
||||
boolean "Control Group support"
|
||||
help
|
||||
|
@ -632,6 +581,36 @@ config CGROUP_MEM_RES_CTLR_SWAP
|
|||
Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
|
||||
size is 4096bytes, 512k per 1Gbytes of swap.
|
||||
|
||||
menuconfig CGROUP_SCHED
|
||||
bool "Group CPU scheduler"
|
||||
depends on EXPERIMENTAL && CGROUPS
|
||||
default n
|
||||
help
|
||||
This feature lets CPU scheduler recognize task groups and control CPU
|
||||
bandwidth allocation to such task groups. It uses cgroups to group
|
||||
tasks.
|
||||
|
||||
if CGROUP_SCHED
|
||||
config FAIR_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_OTHER"
|
||||
depends on CGROUP_SCHED
|
||||
default CGROUP_SCHED
|
||||
|
||||
config RT_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_RR/FIFO"
|
||||
depends on EXPERIMENTAL
|
||||
depends on CGROUP_SCHED
|
||||
default n
|
||||
help
|
||||
This feature lets you explicitly allocate real CPU bandwidth
|
||||
to users or control groups (depending on the "Basis for grouping tasks"
|
||||
setting below. If enabled, it will also make it impossible to
|
||||
schedule realtime tasks for non-root users until you allocate
|
||||
realtime bandwidth for them.
|
||||
See Documentation/scheduler/sched-rt-group.txt for more information.
|
||||
|
||||
endif #CGROUP_SCHED
|
||||
|
||||
endif # CGROUPS
|
||||
|
||||
config MM_OWNER
|
||||
|
|
|
@ -197,16 +197,8 @@ static int __init ksysfs_init(void)
|
|||
goto group_exit;
|
||||
}
|
||||
|
||||
/* create the /sys/kernel/uids/ directory */
|
||||
error = uids_sysfs_init();
|
||||
if (error)
|
||||
goto notes_exit;
|
||||
|
||||
return 0;
|
||||
|
||||
notes_exit:
|
||||
if (notes_size > 0)
|
||||
sysfs_remove_bin_file(kernel_kobj, ¬es_attr);
|
||||
group_exit:
|
||||
sysfs_remove_group(kernel_kobj, &kernel_attr_group);
|
||||
kset_exit:
|
||||
|
|
|
@ -101,7 +101,7 @@ static void create_kthread(struct kthread_create_info *create)
|
|||
*
|
||||
* Description: This helper function creates and names a kernel
|
||||
* thread. The thread will be stopped: use wake_up_process() to start
|
||||
* it. See also kthread_run(), kthread_create_on_cpu().
|
||||
* it. See also kthread_run().
|
||||
*
|
||||
* When woken, the thread will run @threadfn() with @data as its
|
||||
* argument. @threadfn() can either call do_exit() directly if it is a
|
||||
|
|
2125
kernel/sched.c
2125
kernel/sched.c
File diff suppressed because it is too large
Load Diff
|
@ -47,9 +47,7 @@ static int convert_prio(int prio)
|
|||
}
|
||||
|
||||
#define for_each_cpupri_active(array, idx) \
|
||||
for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
|
||||
idx < CPUPRI_NR_PRIORITIES; \
|
||||
idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
|
||||
for_each_bit(idx, array, CPUPRI_NR_PRIORITIES)
|
||||
|
||||
/**
|
||||
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||
|
|
1701
kernel/sched_fair.c
1701
kernel/sched_fair.c
File diff suppressed because it is too large
Load Diff
|
@ -44,24 +44,6 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
|||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned long
|
||||
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
@ -97,7 +79,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
|||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
||||
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -119,9 +101,6 @@ static const struct sched_class idle_sched_class = {
|
|||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
|
||||
.load_balance = load_balance_idle,
|
||||
.move_one_task = move_one_task_idle,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
|
|
|
@ -194,17 +194,20 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
|
|||
return rt_se->my_q;
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
|
||||
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
{
|
||||
int this_cpu = smp_processor_id();
|
||||
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
|
||||
struct sched_rt_entity *rt_se = rt_rq->rt_se;
|
||||
struct sched_rt_entity *rt_se;
|
||||
|
||||
rt_se = rt_rq->tg->rt_se[this_cpu];
|
||||
|
||||
if (rt_rq->rt_nr_running) {
|
||||
if (rt_se && !on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se);
|
||||
enqueue_rt_entity(rt_se, false);
|
||||
if (rt_rq->highest_prio.curr < curr->prio)
|
||||
resched_task(curr);
|
||||
}
|
||||
|
@ -212,7 +215,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
|||
|
||||
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct sched_rt_entity *rt_se = rt_rq->rt_se;
|
||||
int this_cpu = smp_processor_id();
|
||||
struct sched_rt_entity *rt_se;
|
||||
|
||||
rt_se = rt_rq->tg->rt_se[this_cpu];
|
||||
|
||||
if (rt_se && on_rt_rq(rt_se))
|
||||
dequeue_rt_entity(rt_se);
|
||||
|
@ -803,7 +809,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||
dec_rt_group(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
|
@ -819,7 +825,10 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
|||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
|
||||
return;
|
||||
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
if (head)
|
||||
list_add(&rt_se->run_list, queue);
|
||||
else
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
|
||||
inc_rt_tasks(rt_se, rt_rq);
|
||||
|
@ -856,11 +865,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
|
|||
}
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
{
|
||||
dequeue_rt_stack(rt_se);
|
||||
for_each_sched_rt_entity(rt_se)
|
||||
__enqueue_rt_entity(rt_se);
|
||||
__enqueue_rt_entity(rt_se, head);
|
||||
}
|
||||
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
|
@ -871,21 +880,22 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
|||
struct rt_rq *rt_rq = group_rt_rq(rt_se);
|
||||
|
||||
if (rt_rq && rt_rq->rt_nr_running)
|
||||
__enqueue_rt_entity(rt_se);
|
||||
__enqueue_rt_entity(rt_se, false);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Adding/removing a task to/from a priority array:
|
||||
*/
|
||||
static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
static void
|
||||
enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
|
||||
{
|
||||
struct sched_rt_entity *rt_se = &p->rt;
|
||||
|
||||
if (wakeup)
|
||||
rt_se->timeout = 0;
|
||||
|
||||
enqueue_rt_entity(rt_se);
|
||||
enqueue_rt_entity(rt_se, head);
|
||||
|
||||
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
@ -1481,24 +1491,6 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
|
|||
push_rt_tasks(rq);
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
/* don't touch RT tasks */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
/* don't touch RT tasks */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_cpus_allowed_rt(struct task_struct *p,
|
||||
const struct cpumask *new_mask)
|
||||
{
|
||||
|
@ -1721,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq)
|
|||
dequeue_pushable_task(rq, p);
|
||||
}
|
||||
|
||||
unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
|
||||
static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
/*
|
||||
* Time slice is 0 for SCHED_FIFO tasks
|
||||
|
@ -1746,8 +1738,6 @@ static const struct sched_class rt_sched_class = {
|
|||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_rt,
|
||||
|
||||
.load_balance = load_balance_rt,
|
||||
.move_one_task = move_one_task_rt,
|
||||
.set_cpus_allowed = set_cpus_allowed_rt,
|
||||
.rq_online = rq_online_rt,
|
||||
.rq_offline = rq_offline_rt,
|
||||
|
|
|
@ -571,11 +571,6 @@ static int set_user(struct cred *new)
|
|||
if (!new_user)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!task_can_switch_user(new_user, current)) {
|
||||
free_uid(new_user);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (atomic_read(&new_user->processes) >=
|
||||
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
|
||||
new_user != INIT_USER) {
|
||||
|
|
305
kernel/user.c
305
kernel/user.c
|
@ -56,9 +56,6 @@ struct user_struct root_user = {
|
|||
.sigpending = ATOMIC_INIT(0),
|
||||
.locked_shm = 0,
|
||||
.user_ns = &init_user_ns,
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
.tg = &init_task_group,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -75,268 +72,6 @@ static void uid_hash_remove(struct user_struct *up)
|
|||
put_user_ns(up->user_ns);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
|
||||
static void sched_destroy_user(struct user_struct *up)
|
||||
{
|
||||
sched_destroy_group(up->tg);
|
||||
}
|
||||
|
||||
static int sched_create_user(struct user_struct *up)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
up->tg = sched_create_group(&root_task_group);
|
||||
if (IS_ERR(up->tg))
|
||||
rc = -ENOMEM;
|
||||
|
||||
set_tg_uid(up);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#else /* CONFIG_USER_SCHED */
|
||||
|
||||
static void sched_destroy_user(struct user_struct *up) { }
|
||||
static int sched_create_user(struct user_struct *up) { return 0; }
|
||||
|
||||
#endif /* CONFIG_USER_SCHED */
|
||||
|
||||
#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
{
|
||||
struct user_struct *user;
|
||||
struct hlist_node *h;
|
||||
|
||||
hlist_for_each_entry(user, h, hashent, uidhash_node) {
|
||||
if (user->uid == uid) {
|
||||
/* possibly resurrect an "almost deleted" object */
|
||||
if (atomic_inc_return(&user->__count) == 1)
|
||||
cancel_delayed_work(&user->work);
|
||||
return user;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
|
||||
static DEFINE_MUTEX(uids_mutex);
|
||||
|
||||
static inline void uids_mutex_lock(void)
|
||||
{
|
||||
mutex_lock(&uids_mutex);
|
||||
}
|
||||
|
||||
static inline void uids_mutex_unlock(void)
|
||||
{
|
||||
mutex_unlock(&uids_mutex);
|
||||
}
|
||||
|
||||
/* uid directory attributes */
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static ssize_t cpu_shares_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%lu\n", sched_group_shares(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_shares_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
unsigned long shares;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%lu", &shares);
|
||||
|
||||
rc = sched_group_set_shares(up->tg, shares);
|
||||
|
||||
return (rc ? rc : size);
|
||||
}
|
||||
|
||||
static struct kobj_attribute cpu_share_attr =
|
||||
__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
unsigned long rt_runtime;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%ld", &rt_runtime);
|
||||
|
||||
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
|
||||
|
||||
return (rc ? rc : size);
|
||||
}
|
||||
|
||||
static struct kobj_attribute cpu_rt_runtime_attr =
|
||||
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
|
||||
|
||||
static ssize_t cpu_rt_period_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_rt_period_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
unsigned long rt_period;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%lu", &rt_period);
|
||||
|
||||
rc = sched_group_set_rt_period(up->tg, rt_period);
|
||||
|
||||
return (rc ? rc : size);
|
||||
}
|
||||
|
||||
static struct kobj_attribute cpu_rt_period_attr =
|
||||
__ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
|
||||
#endif
|
||||
|
||||
/* default attributes per uid directory */
|
||||
static struct attribute *uids_attributes[] = {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
&cpu_share_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
&cpu_rt_runtime_attr.attr,
|
||||
&cpu_rt_period_attr.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
/* the lifetime of user_struct is not managed by the core (now) */
|
||||
static void uids_release(struct kobject *kobj)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static struct kobj_type uids_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_attrs = uids_attributes,
|
||||
.release = uids_release,
|
||||
};
|
||||
|
||||
/*
|
||||
* Create /sys/kernel/uids/<uid>/cpu_share file for this user
|
||||
* We do not create this file for users in a user namespace (until
|
||||
* sysfs tagging is implemented).
|
||||
*
|
||||
* See Documentation/scheduler/sched-design-CFS.txt for ramifications.
|
||||
*/
|
||||
static int uids_user_create(struct user_struct *up)
|
||||
{
|
||||
struct kobject *kobj = &up->kobj;
|
||||
int error;
|
||||
|
||||
memset(kobj, 0, sizeof(struct kobject));
|
||||
if (up->user_ns != &init_user_ns)
|
||||
return 0;
|
||||
kobj->kset = uids_kset;
|
||||
error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
|
||||
if (error) {
|
||||
kobject_put(kobj);
|
||||
goto done;
|
||||
}
|
||||
|
||||
kobject_uevent(kobj, KOBJ_ADD);
|
||||
done:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* create these entries in sysfs:
|
||||
* "/sys/kernel/uids" directory
|
||||
* "/sys/kernel/uids/0" directory (for root user)
|
||||
* "/sys/kernel/uids/0/cpu_share" file (for root user)
|
||||
*/
|
||||
int __init uids_sysfs_init(void)
|
||||
{
|
||||
uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
|
||||
if (!uids_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
return uids_user_create(&root_user);
|
||||
}
|
||||
|
||||
/* delayed work function to remove sysfs directory for a user and free up
|
||||
* corresponding structures.
|
||||
*/
|
||||
static void cleanup_user_struct(struct work_struct *w)
|
||||
{
|
||||
struct user_struct *up = container_of(w, struct user_struct, work.work);
|
||||
unsigned long flags;
|
||||
int remove_user = 0;
|
||||
|
||||
/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
|
||||
* atomic.
|
||||
*/
|
||||
uids_mutex_lock();
|
||||
|
||||
spin_lock_irqsave(&uidhash_lock, flags);
|
||||
if (atomic_read(&up->__count) == 0) {
|
||||
uid_hash_remove(up);
|
||||
remove_user = 1;
|
||||
}
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
|
||||
if (!remove_user)
|
||||
goto done;
|
||||
|
||||
if (up->user_ns == &init_user_ns) {
|
||||
kobject_uevent(&up->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&up->kobj);
|
||||
kobject_put(&up->kobj);
|
||||
}
|
||||
|
||||
sched_destroy_user(up);
|
||||
key_put(up->uid_keyring);
|
||||
key_put(up->session_keyring);
|
||||
kmem_cache_free(uid_cachep, up);
|
||||
|
||||
done:
|
||||
uids_mutex_unlock();
|
||||
}
|
||||
|
||||
/* IRQs are disabled and uidhash_lock is held upon function entry.
|
||||
* IRQ state (as stored in flags) is restored and uidhash_lock released
|
||||
* upon function exit.
|
||||
*/
|
||||
static void free_user(struct user_struct *up, unsigned long flags)
|
||||
{
|
||||
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
|
||||
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
}
|
||||
|
||||
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
{
|
||||
struct user_struct *user;
|
||||
|
@ -352,11 +87,6 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
int uids_sysfs_init(void) { return 0; }
|
||||
static inline int uids_user_create(struct user_struct *up) { return 0; }
|
||||
static inline void uids_mutex_lock(void) { }
|
||||
static inline void uids_mutex_unlock(void) { }
|
||||
|
||||
/* IRQs are disabled and uidhash_lock is held upon function entry.
|
||||
* IRQ state (as stored in flags) is restored and uidhash_lock released
|
||||
* upon function exit.
|
||||
|
@ -365,32 +95,11 @@ static void free_user(struct user_struct *up, unsigned long flags)
|
|||
{
|
||||
uid_hash_remove(up);
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
sched_destroy_user(up);
|
||||
key_put(up->uid_keyring);
|
||||
key_put(up->session_keyring);
|
||||
kmem_cache_free(uid_cachep, up);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
|
||||
/*
|
||||
* We need to check if a setuid can take place. This function should be called
|
||||
* before successfully completing the setuid.
|
||||
*/
|
||||
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
|
||||
{
|
||||
|
||||
return sched_rt_can_attach(up->tg, tsk);
|
||||
|
||||
}
|
||||
#else
|
||||
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Locate the user_struct for the passed UID. If found, take a ref on it. The
|
||||
* caller must undo that ref with free_uid().
|
||||
|
@ -431,8 +140,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
|
|||
/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
|
||||
* atomic.
|
||||
*/
|
||||
uids_mutex_lock();
|
||||
|
||||
spin_lock_irq(&uidhash_lock);
|
||||
up = uid_hash_find(uid, hashent);
|
||||
spin_unlock_irq(&uidhash_lock);
|
||||
|
@ -445,14 +152,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
|
|||
new->uid = uid;
|
||||
atomic_set(&new->__count, 1);
|
||||
|
||||
if (sched_create_user(new) < 0)
|
||||
goto out_free_user;
|
||||
|
||||
new->user_ns = get_user_ns(ns);
|
||||
|
||||
if (uids_user_create(new))
|
||||
goto out_destoy_sched;
|
||||
|
||||
/*
|
||||
* Before adding this, check whether we raced
|
||||
* on adding the same user already..
|
||||
|
@ -475,17 +176,11 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
|
|||
spin_unlock_irq(&uidhash_lock);
|
||||
}
|
||||
|
||||
uids_mutex_unlock();
|
||||
|
||||
return up;
|
||||
|
||||
out_destoy_sched:
|
||||
sched_destroy_user(new);
|
||||
put_user_ns(new->user_ns);
|
||||
out_free_user:
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
out_unlock:
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue