fs/ioctl: nvidia-smi process and memory display in docker
Process information can not be displayed while running nvidia-smi command in container. The reason is nvidia-smi command using the host pid to retrieve process information but the host pid does not exist in container the nvidia-smi get nothing using the host pid. sine we trap the nvidia-smi ioctl command to replace the host pid with the guest, the nv_cmd_id and pad is changed, so update it by nvidia module's version. Fixes: fc35f2a69b37 ("ioctl: trap nvidia-smi command") Signed-off-by: Huang Cun <cunhuang@tencent.com> Reviewed-by: Jianping Liu <frankjpliu@tencent.com> Reviewed-by: Yongliang Gao <leonylgao@tencent.com> Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
This commit is contained in:
parent
c77c209977
commit
dba2ad55b1
69
fs/ioctl.c
69
fs/ioctl.c
|
@ -705,6 +705,8 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
|
|||
#define NV_CMD "nvidia-smi"
|
||||
unsigned int nv_ioctl_id = 0xc020462a;
|
||||
unsigned int nv_cmd_id_get_gram = 0x800203;
|
||||
#define V525_105_17_PAD_NUM_17 17
|
||||
#define V440_33_01_PAD_NUM_9 9
|
||||
|
||||
struct nv_get_pid_count {
|
||||
u32 count;
|
||||
|
@ -712,7 +714,7 @@ struct nv_get_pid_count {
|
|||
|
||||
struct pids {
|
||||
u32 pid;
|
||||
u32 pad[9];
|
||||
u32 pad[0];
|
||||
};
|
||||
|
||||
struct nv_get_gram {
|
||||
|
@ -730,6 +732,47 @@ struct nv_cmd {
|
|||
|
||||
unsigned int sysctl_nvidia_smi_trap = 0;
|
||||
|
||||
static int get_pad_num_set_cmd_by_version(const char *version)
|
||||
{
|
||||
int pad_num = 0;
|
||||
|
||||
/* version 440.33.01 pad num is 9? */
|
||||
if (strncmp(version, "525.", 4) >= 0) {
|
||||
/* 525.105.17 have been cheched */
|
||||
nv_cmd_id_get_gram = 0x2080018E;
|
||||
pad_num = V525_105_17_PAD_NUM_17;
|
||||
} else {
|
||||
/* original supported version: 440.33.01 */
|
||||
nv_cmd_id_get_gram = 0x800203;
|
||||
pad_num = V440_33_01_PAD_NUM_9;
|
||||
}
|
||||
return pad_num;
|
||||
}
|
||||
|
||||
static int read_nvidia_version(char *buf, int len)
|
||||
{
|
||||
struct file *file;
|
||||
ssize_t bytes_read;
|
||||
loff_t pos = 0;
|
||||
|
||||
file = filp_open("/sys/module/nvidia/version", O_RDONLY, 0);
|
||||
if (IS_ERR(file)) {
|
||||
pr_info("Cannot open /sys/module/nvidia/version\n");
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
|
||||
bytes_read = kernel_read(file, buf, len - 1, &pos);
|
||||
if (bytes_read < 0) {
|
||||
pr_info("Cannot read nvidia version, ret=%zd\n", bytes_read);
|
||||
filp_close(file, NULL);
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
buf[bytes_read] = '\0';
|
||||
filp_close(file, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int convert_pid_to_container(int pid_in_host)
|
||||
{
|
||||
struct pid *ppid;
|
||||
|
@ -793,15 +836,16 @@ static int get_nv_pid_count(struct nv_cmd *nv_cmd)
|
|||
return count;
|
||||
}
|
||||
|
||||
static void change_nv_pid(struct nv_cmd *nv_cmd, int count)
|
||||
static void change_nv_pid(struct nv_cmd *nv_cmd, int count, int pad_num)
|
||||
{
|
||||
int len;
|
||||
struct nv_get_gram *get_gram;
|
||||
int n, i;
|
||||
u32 guest_pid;
|
||||
bool write;
|
||||
int index;
|
||||
|
||||
len = sizeof(struct nv_get_gram) + sizeof(struct pids) * count;
|
||||
len = sizeof(struct nv_get_gram) + (sizeof(u32) * (1 + pad_num))*count;
|
||||
get_gram = (struct nv_get_gram *)kmalloc(len, GFP_KERNEL);
|
||||
if (!get_gram) {
|
||||
printk(KERN_ERR "change nv pid: malloc nv get gram failed\n");
|
||||
|
@ -814,15 +858,16 @@ static void change_nv_pid(struct nv_cmd *nv_cmd, int count)
|
|||
i = 0;
|
||||
write = false;
|
||||
while (i < get_gram->count) {
|
||||
guest_pid = convert_pid_to_container(get_gram->pid[i].pid);
|
||||
index = i * (pad_num + 1);
|
||||
guest_pid = convert_pid_to_container(get_gram->pid[index].pid);
|
||||
if (guest_pid > 0) {
|
||||
/* If process run in other container then geust_pid will return 0. Then we should
|
||||
* not change the pid
|
||||
*/
|
||||
printk(KERN_INFO "change nv pid: host pid:%d, container pid:%d\n", get_gram->pid[i].pid,
|
||||
guest_pid);
|
||||
pr_info("change nv pid: host pid:%d, container pid:%d\n",
|
||||
get_gram->pid[index].pid, guest_pid);
|
||||
write = true;
|
||||
get_gram->pid[i].pid = guest_pid;
|
||||
get_gram->pid[index].pid = guest_pid;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
@ -843,7 +888,7 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
|
|||
int error;
|
||||
struct fd f = fdget(fd);
|
||||
struct nv_cmd nv_cmd;
|
||||
int count;
|
||||
int count, pad_num = 0;
|
||||
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
@ -855,13 +900,19 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
|
|||
if ((sysctl_nvidia_smi_trap == 1) &&
|
||||
(task_active_pid_ns(current)->level) && (nv_ioctl_id == cmd)) {
|
||||
char buf[sizeof(current->comm)];
|
||||
char version[20] = {0};
|
||||
|
||||
get_task_comm(buf, current);
|
||||
if (strcmp(buf, NV_CMD) == 0 &&
|
||||
read_nvidia_version(version, 20) == 0)
|
||||
pad_num = get_pad_num_set_cmd_by_version(version);
|
||||
if (pad_num == 0)
|
||||
return error;
|
||||
if ((strcmp(buf, NV_CMD) == 0) && (get_nv_cmd(arg, &nv_cmd) == 0) &&
|
||||
(nv_cmd.cmd == nv_cmd_id_get_gram)) {
|
||||
count = get_nv_pid_count(&nv_cmd);
|
||||
if (count > 0)
|
||||
change_nv_pid(&nv_cmd, count);
|
||||
change_nv_pid(&nv_cmd, count, pad_num);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue