s390/vdso: implement generic vdso time namespace support

Implement generic vdso time namespace support which also enables time
namespaces for s390. This is quite similar to what arm64 has.

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Heiko Carstens 2021-02-05 16:19:32 +01:00 committed by Vasily Gorbik
parent 1ba2d6c0fd
commit eeab78b05d
6 changed files with 110 additions and 8 deletions

View File

@ -129,6 +129,7 @@ config S390
select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL

View File

@ -3,6 +3,7 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y

View File

@ -7,6 +7,8 @@
/* Default link address for the vDSO */
#define VDSO64_LBASE 0
#define __VVAR_PAGES 2
#define VDSO_VERSION_STRING LINUX_2.6.29
#ifndef __ASSEMBLY__

View File

@ -67,4 +67,11 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
return r2;
}
#ifdef CONFIG_TIME_NS
static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
{
return _timens_data;
}
#endif
#endif

View File

@ -15,12 +15,15 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/time_namespace.h>
#include <vdso/datapage.h>
#include <asm/vdso.h>
extern char vdso64_start[], vdso64_end[];
static unsigned int vdso_pages;
static struct vm_special_mapping vvar_mapping;
static union {
struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
@ -28,6 +31,12 @@ static union {
struct vdso_data *vdso_data = vdso_data_store.data;
enum vvar_pages {
VVAR_DATA_PAGE_OFFSET,
VVAR_TIMENS_PAGE_OFFSET,
VVAR_NR_PAGES,
};
unsigned int __read_mostly vdso_enabled = 1;
static int __init vdso_setup(char *str)
@ -40,12 +49,89 @@ static int __init vdso_setup(char *str)
}
__setup("vdso=", vdso_setup);
#ifdef CONFIG_TIME_NS
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
return (struct vdso_data *)(vvar_page);
}
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops().
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
/*
* The VVAR page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
* page tables are cleared and then they will be re-faulted with a
* corresponding layout.
* See also the comment near timens_setup_vdso_data() for details.
*/
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
mmap_read_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
unsigned long size = vma->vm_end - vma->vm_start;
if (!vma_is_special_mapping(vma, &vvar_mapping))
continue;
zap_page_range(vma, vma->vm_start, size);
break;
}
mmap_read_unlock(mm);
return 0;
}
#else
static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
return NULL;
}
#endif
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
if (vmf->pgoff == 0)
return vmf_insert_pfn(vma, vmf->address, virt_to_pfn(vdso_data));
return VM_FAULT_SIGBUS;
struct page *timens_page = find_timens_vvar_page(vma);
unsigned long pfn;
switch (vmf->pgoff) {
case VVAR_DATA_PAGE_OFFSET:
if (timens_page)
pfn = page_to_pfn(timens_page);
else
pfn = virt_to_pfn(vdso_data);
break;
#ifdef CONFIG_TIME_NS
case VVAR_TIMENS_PAGE_OFFSET:
/*
* If a task belongs to a time namespace then a namespace
* specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
* the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
* offset.
* See also the comment near timens_setup_vdso_data().
*/
if (!timens_page)
return VM_FAULT_SIGBUS;
pfn = virt_to_pfn(vdso_data);
break;
#endif /* CONFIG_TIME_NS */
default:
return VM_FAULT_SIGBUS;
}
return vmf_insert_pfn(vma, vmf->address, pfn);
}
static int vdso_mremap(const struct vm_special_mapping *sm,
@ -80,23 +166,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
struct vm_area_struct *vma;
int rc;
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
if (!vdso_enabled || is_compat_task())
return 0;
if (mmap_write_lock_killable(mm))
return -EINTR;
vdso_text_len = vdso_pages << PAGE_SHIFT;
vdso_mapping_len = vdso_text_len + PAGE_SIZE;
vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
vma = _install_special_mapping(mm, vvar_start, PAGE_SIZE,
VM_READ|VM_MAYREAD|VM_PFNMAP,
vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
VM_PFNMAP,
&vvar_mapping);
rc = PTR_ERR(vma);
if (IS_ERR(vma))
goto out;
vdso_text_start = vvar_start + PAGE_SIZE;
vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
/* VM_MAYWRITE for COW so gdb can set breakpoints */
vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
VM_READ|VM_EXEC|

View File

@ -13,7 +13,10 @@ ENTRY(_start)
SECTIONS
{
PROVIDE(_vdso_data = . - PAGE_SIZE);
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
. = VDSO64_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text