/dev/mem: Revoke mappings when a driver claims the region
Close the hole of holding a mapping over kernel driver takeover event of a given address range. Commit90a545e981
("restrict /dev/mem to idle io memory ranges") introduced CONFIG_IO_STRICT_DEVMEM with the goal of protecting the kernel against scenarios where a /dev/mem user tramples memory that a kernel driver owns. However, this protection only prevents *new* read(), write() and mmap() requests. Established mappings prior to the driver calling request_mem_region() are left alone. Especially with persistent memory, and the core kernel metadata that is stored there, there are plentiful scenarios for a /dev/mem user to violate the expectations of the driver and cause amplified damage. Teach request_mem_region() to find and shoot down active /dev/mem mappings that it believes it has successfully claimed for the exclusive use of the driver. Effectively a driver call to request_mem_region() becomes a hole-punch on the /dev/mem device. The typical usage of unmap_mapping_range() is part of truncate_pagecache() to punch a hole in a file, but in this case the implementation is only doing the "first half" of a hole punch. Namely it is just evacuating current established mappings of the "hole", and it relies on the fact that /dev/mem establishes mappings in terms of absolute physical address offsets. Once existing mmap users are invalidated they can attempt to re-establish the mapping, or attempt to continue issuing read(2) / write(2) to the invalidated extent, but they will then be subject to the CONFIG_IO_STRICT_DEVMEM checking that can block those subsequent accesses. Cc: Arnd Bergmann <arnd@arndb.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Fixes:90a545e981
("restrict /dev/mem to idle io memory ranges") Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/r/159009507306.847224.8502634072429766747.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
03358b0f7b
commit
3234ac664a
|
@ -31,11 +31,15 @@
|
|||
#include <linux/uio.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <uapi/linux/magic.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#ifdef CONFIG_IA64
|
||||
# include <linux/efi.h>
|
||||
#endif
|
||||
|
||||
#define DEVMEM_MINOR 1
|
||||
#define DEVPORT_MINOR 4
|
||||
|
||||
static inline unsigned long size_inside_page(unsigned long start,
|
||||
|
@ -805,12 +809,64 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct inode *devmem_inode;
|
||||
|
||||
#ifdef CONFIG_IO_STRICT_DEVMEM
|
||||
void revoke_devmem(struct resource *res)
|
||||
{
|
||||
struct inode *inode = READ_ONCE(devmem_inode);
|
||||
|
||||
/*
|
||||
* Check that the initialization has completed. Losing the race
|
||||
* is ok because it means drivers are claiming resources before
|
||||
* the fs_initcall level of init and prevent /dev/mem from
|
||||
* establishing mappings.
|
||||
*/
|
||||
if (!inode)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The expectation is that the driver has successfully marked
|
||||
* the resource busy by this point, so devmem_is_allowed()
|
||||
* should start returning false, however for performance this
|
||||
* does not iterate the entire resource range.
|
||||
*/
|
||||
if (devmem_is_allowed(PHYS_PFN(res->start)) &&
|
||||
devmem_is_allowed(PHYS_PFN(res->end))) {
|
||||
/*
|
||||
* *cringe* iomem=relaxed says "go ahead, what's the
|
||||
* worst that can happen?"
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int open_port(struct inode *inode, struct file *filp)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!capable(CAP_SYS_RAWIO))
|
||||
return -EPERM;
|
||||
|
||||
return security_locked_down(LOCKDOWN_DEV_MEM);
|
||||
rc = security_locked_down(LOCKDOWN_DEV_MEM);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (iminor(inode) != DEVMEM_MINOR)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Use a unified address space to have a single point to manage
|
||||
* revocations when drivers want to take over a /dev/mem mapped
|
||||
* range.
|
||||
*/
|
||||
inode->i_mapping = devmem_inode->i_mapping;
|
||||
filp->f_mapping = inode->i_mapping;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define zero_lseek null_lseek
|
||||
|
@ -885,7 +941,7 @@ static const struct memdev {
|
|||
fmode_t fmode;
|
||||
} devlist[] = {
|
||||
#ifdef CONFIG_DEVMEM
|
||||
[1] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET },
|
||||
[DEVMEM_MINOR] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET },
|
||||
#endif
|
||||
#ifdef CONFIG_DEVKMEM
|
||||
[2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET },
|
||||
|
@ -939,6 +995,45 @@ static char *mem_devnode(struct device *dev, umode_t *mode)
|
|||
|
||||
static struct class *mem_class;
|
||||
|
||||
static int devmem_fs_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_system_type devmem_fs_type = {
|
||||
.name = "devmem",
|
||||
.owner = THIS_MODULE,
|
||||
.init_fs_context = devmem_fs_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
static int devmem_init_inode(void)
|
||||
{
|
||||
static struct vfsmount *devmem_vfs_mount;
|
||||
static int devmem_fs_cnt;
|
||||
struct inode *inode;
|
||||
int rc;
|
||||
|
||||
rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt);
|
||||
if (rc < 0) {
|
||||
pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb);
|
||||
if (IS_ERR(inode)) {
|
||||
rc = PTR_ERR(inode);
|
||||
pr_err("Cannot allocate inode for /dev/mem: %d\n", rc);
|
||||
simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* publish /dev/mem initialized */
|
||||
WRITE_ONCE(devmem_inode, inode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init chr_dev_init(void)
|
||||
{
|
||||
int minor;
|
||||
|
@ -960,6 +1055,8 @@ static int __init chr_dev_init(void)
|
|||
*/
|
||||
if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
|
||||
continue;
|
||||
if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0)
|
||||
continue;
|
||||
|
||||
device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
|
||||
NULL, devlist[minor].name);
|
||||
|
|
|
@ -301,5 +301,11 @@ struct resource *devm_request_free_mem_region(struct device *dev,
|
|||
struct resource *request_free_mem_region(struct resource *base,
|
||||
unsigned long size, const char *name);
|
||||
|
||||
#ifdef CONFIG_IO_STRICT_DEVMEM
|
||||
void revoke_devmem(struct resource *res);
|
||||
#else
|
||||
static inline void revoke_devmem(struct resource *res) { };
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _LINUX_IOPORT_H */
|
||||
|
|
|
@ -94,6 +94,7 @@
|
|||
#define BALLOON_KVM_MAGIC 0x13661366
|
||||
#define ZSMALLOC_MAGIC 0x58295829
|
||||
#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */
|
||||
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
|
||||
#define Z3FOLD_MAGIC 0x33
|
||||
#define PPC_CMM_MAGIC 0xc7571590
|
||||
|
||||
|
|
|
@ -1126,6 +1126,7 @@ struct resource * __request_region(struct resource *parent,
|
|||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
struct resource *res = alloc_resource(GFP_KERNEL);
|
||||
struct resource *orig_parent = parent;
|
||||
|
||||
if (!res)
|
||||
return NULL;
|
||||
|
@ -1176,6 +1177,10 @@ struct resource * __request_region(struct resource *parent,
|
|||
break;
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
|
||||
if (res && orig_parent == &iomem_resource)
|
||||
revoke_devmem(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(__request_region);
|
||||
|
|
Loading…
Reference in New Issue