Merge branch 'xen-upstream' of ssh://master.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of ssh://master.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: (44 commits) xen: disable all non-virtual drivers xen: use iret directly when possible xen: suppress abs symbol warnings for unused reloc pointers xen: Attempt to patch inline versions of common operations xen: Place vcpu_info structure into per-cpu memory xen: handle external requests for shutdown, reboot and sysrq xen: machine operations xen: add virtual network device driver xen: add virtual block device driver. xen: add the Xenbus sysfs and virtual device hotplug driver xen: Add grant table support xen: use the hvc console infrastructure for Xen console xen: hack to prevent bad segment register reload xen: lazy-mmu operations xen: Add support for preemption xen: SMP guest support xen: Implement sched_clock xen: Account for stolen time xen: ignore RW mapping of RO pages in pagetable_init xen: Complete pagetable pinning ...
This commit is contained in:
commit
5cc97bf2d8
|
@ -222,6 +222,8 @@ config PARAVIRT
|
|||
However, when run without a hypervisor the kernel is
|
||||
theoretically slower. If in doubt, say N.
|
||||
|
||||
source "arch/i386/xen/Kconfig"
|
||||
|
||||
config VMI
|
||||
bool "VMI Paravirt-ops support"
|
||||
depends on PARAVIRT
|
||||
|
|
|
@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000
|
|||
mcore-$(CONFIG_X86_ES7000) := mach-default
|
||||
core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/
|
||||
|
||||
# Xen paravirtualization support
|
||||
core-$(CONFIG_XEN) += arch/i386/xen/
|
||||
|
||||
# default subarch .h files
|
||||
mflags-y += -Iinclude/asm-i386/mach-default
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ static const char* safe_abs_relocs[] = {
|
|||
"__kernel_rt_sigreturn",
|
||||
"__kernel_sigreturn",
|
||||
"SYSENTER_RETURN",
|
||||
"xen_irq_disable_direct_reloc",
|
||||
"xen_save_fl_direct_reloc",
|
||||
};
|
||||
|
||||
static int is_safe_abs_reloc(const char* sym_name)
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include <asm/thread_info.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define DEFINE(sym, val) \
|
||||
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
|
||||
|
||||
|
@ -59,6 +61,7 @@ void foo(void)
|
|||
OFFSET(TI_addr_limit, thread_info, addr_limit);
|
||||
OFFSET(TI_restart_block, thread_info, restart_block);
|
||||
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
|
||||
OFFSET(TI_cpu, thread_info, cpu);
|
||||
BLANK();
|
||||
|
||||
OFFSET(GDS_size, Xgt_desc_struct, size);
|
||||
|
@ -115,4 +118,10 @@ void foo(void)
|
|||
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
|
||||
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
BLANK();
|
||||
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
|
||||
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1023,6 +1023,91 @@ ENTRY(kernel_thread_helper)
|
|||
CFI_ENDPROC
|
||||
ENDPROC(kernel_thread_helper)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
ENTRY(xen_hypervisor_callback)
|
||||
CFI_STARTPROC
|
||||
pushl $0
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
SAVE_ALL
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
/* Check to see if we got the event in the critical
|
||||
region in xen_iret_direct, after we've reenabled
|
||||
events and checked for pending events. This simulates
|
||||
iret instruction's behaviour where it delivers a
|
||||
pending interrupt when enabling interrupts. */
|
||||
movl PT_EIP(%esp),%eax
|
||||
cmpl $xen_iret_start_crit,%eax
|
||||
jb 1f
|
||||
cmpl $xen_iret_end_crit,%eax
|
||||
jae 1f
|
||||
|
||||
call xen_iret_crit_fixup
|
||||
|
||||
1: mov %esp, %eax
|
||||
call xen_evtchn_do_upcall
|
||||
jmp ret_from_intr
|
||||
CFI_ENDPROC
|
||||
ENDPROC(xen_hypervisor_callback)
|
||||
|
||||
# Hypervisor uses this for application faults while it executes.
|
||||
# We get here for two reasons:
|
||||
# 1. Fault while reloading DS, ES, FS or GS
|
||||
# 2. Fault while executing IRET
|
||||
# Category 1 we fix up by reattempting the load, and zeroing the segment
|
||||
# register if the load fails.
|
||||
# Category 2 we fix up by jumping to do_iret_error. We cannot use the
|
||||
# normal Linux return path in this case because if we use the IRET hypercall
|
||||
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
|
||||
# We distinguish between categories by maintaining a status value in EAX.
|
||||
ENTRY(xen_failsafe_callback)
|
||||
CFI_STARTPROC
|
||||
pushl %eax
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
movl $1,%eax
|
||||
1: mov 4(%esp),%ds
|
||||
2: mov 8(%esp),%es
|
||||
3: mov 12(%esp),%fs
|
||||
4: mov 16(%esp),%gs
|
||||
testl %eax,%eax
|
||||
popl %eax
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
lea 16(%esp),%esp
|
||||
CFI_ADJUST_CFA_OFFSET -16
|
||||
jz 5f
|
||||
addl $16,%esp
|
||||
jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
|
||||
5: pushl $0 # EAX == 0 => Category 1 (Bad segment)
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
SAVE_ALL
|
||||
jmp ret_from_exception
|
||||
CFI_ENDPROC
|
||||
|
||||
.section .fixup,"ax"
|
||||
6: xorl %eax,%eax
|
||||
movl %eax,4(%esp)
|
||||
jmp 1b
|
||||
7: xorl %eax,%eax
|
||||
movl %eax,8(%esp)
|
||||
jmp 2b
|
||||
8: xorl %eax,%eax
|
||||
movl %eax,12(%esp)
|
||||
jmp 3b
|
||||
9: xorl %eax,%eax
|
||||
movl %eax,16(%esp)
|
||||
jmp 4b
|
||||
.previous
|
||||
.section __ex_table,"a"
|
||||
.align 4
|
||||
.long 1b,6b
|
||||
.long 2b,7b
|
||||
.long 3b,8b
|
||||
.long 4b,9b
|
||||
.previous
|
||||
ENDPROC(xen_failsafe_callback)
|
||||
|
||||
#endif /* CONFIG_XEN */
|
||||
|
||||
.section .rodata,"a"
|
||||
#include "syscall_table.S"
|
||||
|
||||
|
|
|
@ -510,7 +510,8 @@ ENTRY(_stext)
|
|||
/*
|
||||
* BSS section
|
||||
*/
|
||||
.section ".bss.page_aligned","w"
|
||||
.section ".bss.page_aligned","wa"
|
||||
.align PAGE_SIZE_asm
|
||||
ENTRY(swapper_pg_dir)
|
||||
.fill 1024,4,0
|
||||
ENTRY(swapper_pg_pmd)
|
||||
|
@ -538,6 +539,8 @@ fault_msg:
|
|||
.ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
|
||||
.asciz "Stack: %p %p %p %p %p %p %p %p\n"
|
||||
|
||||
#include "../xen/xen-head.S"
|
||||
|
||||
/*
|
||||
* The IDT and GDT 'descriptors' are a strange 48-bit object
|
||||
* only used by the lidt and lgdt instructions. They are not
|
||||
|
|
|
@ -228,6 +228,41 @@ static int __init print_banner(void)
|
|||
}
|
||||
core_initcall(print_banner);
|
||||
|
||||
static struct resource reserve_ioports = {
|
||||
.start = 0,
|
||||
.end = IO_SPACE_LIMIT,
|
||||
.name = "paravirt-ioport",
|
||||
.flags = IORESOURCE_IO | IORESOURCE_BUSY,
|
||||
};
|
||||
|
||||
static struct resource reserve_iomem = {
|
||||
.start = 0,
|
||||
.end = -1,
|
||||
.name = "paravirt-iomem",
|
||||
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
|
||||
};
|
||||
|
||||
/*
|
||||
* Reserve the whole legacy IO space to prevent any legacy drivers
|
||||
* from wasting time probing for their hardware. This is a fairly
|
||||
* brute-force approach to disabling all non-virtual drivers.
|
||||
*
|
||||
* Note that this must be called very early to have any effect.
|
||||
*/
|
||||
int paravirt_disable_iospace(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = request_resource(&ioport_resource, &reserve_ioports);
|
||||
if (ret == 0) {
|
||||
ret = request_resource(&iomem_resource, &reserve_iomem);
|
||||
if (ret)
|
||||
release_resource(&reserve_ioports);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct paravirt_ops paravirt_ops = {
|
||||
.name = "bare hardware",
|
||||
.paravirt_enabled = 0,
|
||||
|
@ -267,7 +302,7 @@ struct paravirt_ops paravirt_ops = {
|
|||
.write_msr = native_write_msr_safe,
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
.get_scheduled_cycles = native_read_tsc,
|
||||
.sched_clock = native_sched_clock,
|
||||
.get_cpu_khz = native_calculate_cpu_khz,
|
||||
.load_tr_desc = native_load_tr_desc,
|
||||
.set_ldt = native_set_ldt,
|
||||
|
|
|
@ -601,6 +601,8 @@ void __init setup_arch(char **cmdline_p)
|
|||
* NOTE: at this point the bootmem allocator is fully available.
|
||||
*/
|
||||
|
||||
paravirt_post_allocator_init();
|
||||
|
||||
dmi_scan_machine();
|
||||
|
||||
#ifdef CONFIG_X86_GENERICARCH
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <mach_apic.h>
|
||||
|
||||
/*
|
||||
|
@ -249,13 +250,13 @@ static unsigned long flush_va;
|
|||
static DEFINE_SPINLOCK(tlbstate_lock);
|
||||
|
||||
/*
|
||||
* We cannot call mmdrop() because we are in interrupt context,
|
||||
* We cannot call mmdrop() because we are in interrupt context,
|
||||
* instead update mm->cpu_vm_mask.
|
||||
*
|
||||
* We need to reload %cr3 since the page tables may be going
|
||||
* away from under us..
|
||||
*/
|
||||
static inline void leave_mm (unsigned long cpu)
|
||||
void leave_mm(unsigned long cpu)
|
||||
{
|
||||
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
|
||||
BUG();
|
||||
|
|
|
@ -148,7 +148,7 @@ void __init smp_alloc_memory(void)
|
|||
* a given CPU
|
||||
*/
|
||||
|
||||
static void __cpuinit smp_store_cpu_info(int id)
|
||||
void __cpuinit smp_store_cpu_info(int id)
|
||||
{
|
||||
struct cpuinfo_x86 *c = cpu_data + id;
|
||||
|
||||
|
@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu)
|
|||
/* representing cpus for which sibling maps can be computed */
|
||||
static cpumask_t cpu_sibling_setup_map;
|
||||
|
||||
static inline void
|
||||
set_cpu_sibling_map(int cpu)
|
||||
void set_cpu_sibling_map(int cpu)
|
||||
{
|
||||
int i;
|
||||
struct cpuinfo_x86 *c = cpu_data;
|
||||
|
@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void
|
||||
remove_siblinginfo(int cpu)
|
||||
void remove_siblinginfo(int cpu)
|
||||
{
|
||||
int sibling;
|
||||
struct cpuinfo_x86 *c = cpu_data;
|
||||
|
|
|
@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void)
|
|||
*
|
||||
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
||||
*/
|
||||
static unsigned long cyc2ns_scale __read_mostly;
|
||||
unsigned long cyc2ns_scale __read_mostly;
|
||||
|
||||
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
||||
|
||||
|
@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz)
|
|||
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
|
||||
}
|
||||
|
||||
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
||||
{
|
||||
return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
unsigned long long native_sched_clock(void)
|
||||
{
|
||||
unsigned long long this_offset;
|
||||
|
||||
|
@ -118,12 +113,24 @@ unsigned long long sched_clock(void)
|
|||
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
|
||||
|
||||
/* read the Time Stamp Counter: */
|
||||
get_scheduled_cycles(this_offset);
|
||||
rdtscll(this_offset);
|
||||
|
||||
/* return the value in ns */
|
||||
return cycles_2_ns(this_offset);
|
||||
}
|
||||
|
||||
/* We need to define a real function for sched_clock, to override the
|
||||
weak default version */
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
unsigned long long sched_clock(void)
|
||||
{
|
||||
return paravirt_sched_clock();
|
||||
}
|
||||
#else
|
||||
unsigned long long sched_clock(void)
|
||||
__attribute__((alias("native_sched_clock")));
|
||||
#endif
|
||||
|
||||
unsigned long native_calculate_cpu_khz(void)
|
||||
{
|
||||
unsigned long long start, end;
|
||||
|
|
|
@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void vmi_allocate_pt(u32 pfn)
|
||||
static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
|
||||
{
|
||||
vmi_set_page_type(pfn, VMI_PAGE_L1);
|
||||
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
|
||||
|
@ -891,7 +891,7 @@ static inline int __init activate_vmi(void)
|
|||
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
|
||||
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
|
||||
#endif
|
||||
paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
|
||||
paravirt_ops.sched_clock = vmi_sched_clock;
|
||||
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
|
||||
|
||||
/* We have true wallclock functions; disable CMOS clock sync */
|
||||
|
|
|
@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
|
||||
unsigned long long vmi_get_sched_cycles(void)
|
||||
/* paravirt_ops.sched_clock = vmi_sched_clock */
|
||||
unsigned long long vmi_sched_clock(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
|
||||
return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
|
||||
}
|
||||
|
||||
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
|
||||
|
|
|
@ -88,6 +88,7 @@ SECTIONS
|
|||
|
||||
. = ALIGN(4096);
|
||||
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
|
||||
*(.data.page_aligned)
|
||||
*(.data.idt)
|
||||
}
|
||||
|
||||
|
|
|
@ -3,23 +3,40 @@
|
|||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/uts.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
|
||||
.section name, flags; \
|
||||
.balign 4; \
|
||||
.long 1f - 0f; /* name length */ \
|
||||
.long 3f - 2f; /* data length */ \
|
||||
.long type; /* note type */ \
|
||||
0: .asciz vendor; /* vendor name */ \
|
||||
1: .balign 4; \
|
||||
2:
|
||||
|
||||
#define ASM_ELF_NOTE_END \
|
||||
3: .balign 4; /* pad out section */ \
|
||||
.previous
|
||||
|
||||
ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
|
||||
/* Ideally this would use UTS_NAME, but using a quoted string here
|
||||
doesn't work. Remember to change this when changing the
|
||||
kernel's name. */
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ASM_ELF_NOTE_END
|
||||
ELFNOTE_END
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
|
||||
/*
|
||||
* Add a special note telling glibc's dynamic linker a fake hardware
|
||||
* flavor that it will use to choose the search path for libraries in the
|
||||
* same way it uses real hardware capabilities like "mmx".
|
||||
* We supply "nosegneg" as the fake capability, to indicate that we
|
||||
* do not like negative offsets in instructions using segment overrides,
|
||||
* since we implement those inefficiently. This makes it possible to
|
||||
* install libraries optimized to avoid those access patterns in someplace
|
||||
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
|
||||
* corresponding to the bits here is needed to make ldconfig work right.
|
||||
* It should contain:
|
||||
* hwcap 1 nosegneg
|
||||
* to match the mapping of bit to name that we give here.
|
||||
*/
|
||||
|
||||
/* Bit used for the pseudo-hwcap for non-negative segments. We use
|
||||
bit 1 to avoid bugs in some versions of glibc when bit 0 is
|
||||
used; the choice is otherwise arbitrary. */
|
||||
#define VDSO_NOTE_NONEGSEG_BIT 1
|
||||
|
||||
ELFNOTE_START(GNU, 2, "a")
|
||||
.long 1, 1<<VDSO_NOTE_NONEGSEG_BIT /* ncaps, mask */
|
||||
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
|
||||
ELFNOTE_END
|
||||
#endif
|
||||
|
|
|
@ -52,7 +52,7 @@ execute(const char *string)
|
|||
NULL,
|
||||
};
|
||||
|
||||
if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) {
|
||||
if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
|
||||
printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
|
||||
string, ret);
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
|
|||
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
|
||||
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
||||
|
||||
paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
|
||||
paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
|
||||
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
|
||||
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
|
||||
}
|
||||
|
@ -473,6 +473,7 @@ void zap_low_mappings (void)
|
|||
|
||||
static int disable_nx __initdata = 0;
|
||||
u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
|
||||
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
||||
|
||||
/*
|
||||
* noexec = on|off
|
||||
|
|
|
@ -60,7 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
|
|||
address = __pa(address);
|
||||
addr = address & LARGE_PAGE_MASK;
|
||||
pbase = (pte_t *)page_address(base);
|
||||
paravirt_alloc_pt(page_to_pfn(base));
|
||||
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
|
||||
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
|
||||
set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
|
||||
addr == address ? prot : ref_prot));
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
#
|
||||
# This Kconfig describes xen options
|
||||
#
|
||||
|
||||
config XEN
|
||||
bool "Enable support for Xen hypervisor"
|
||||
depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
|
||||
help
|
||||
This is the Linux Xen port. Enabling this will allow the
|
||||
kernel to boot in a paravirtualized environment under the
|
||||
Xen hypervisor.
|
|
@ -0,0 +1,4 @@
|
|||
obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \
|
||||
events.o time.o manage.o xen-asm.o
|
||||
|
||||
obj-$(CONFIG_SMP) += smp.o
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,590 @@
|
|||
/*
|
||||
* Xen event channels
|
||||
*
|
||||
* Xen models interrupts with abstract event channels. Because each
|
||||
* domain gets 1024 event channels, but NR_IRQ is not that large, we
|
||||
* must dynamically map irqs<->event channels. The event channels
|
||||
* interface with the rest of the kernel by defining a xen interrupt
|
||||
* chip. When an event is recieved, it is mapped to an irq and sent
|
||||
* through the normal interrupt processing path.
|
||||
*
|
||||
* There are four kinds of events which can be mapped to an event
|
||||
* channel:
|
||||
*
|
||||
* 1. Inter-domain notifications. This includes all the virtual
|
||||
* device events, since they're driven by front-ends in another domain
|
||||
* (typically dom0).
|
||||
* 2. VIRQs, typically used for timers. These are per-cpu events.
|
||||
* 3. IPIs.
|
||||
* 4. Hardware interrupts. Not supported at present.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/sync_bitops.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include <xen/events.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/event_channel.h>
|
||||
|
||||
#include "xen-ops.h"
|
||||
|
||||
/*
|
||||
* This lock protects updates to the following mapping and reference-count
|
||||
* arrays. The lock does not need to be acquired to read the mapping tables.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(irq_mapping_update_lock);
|
||||
|
||||
/* IRQ <-> VIRQ mapping. */
|
||||
static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
|
||||
|
||||
/* IRQ <-> IPI mapping */
|
||||
static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
|
||||
|
||||
/* Packed IRQ information: binding type, sub-type index, and event channel. */
|
||||
struct packed_irq
|
||||
{
|
||||
unsigned short evtchn;
|
||||
unsigned char index;
|
||||
unsigned char type;
|
||||
};
|
||||
|
||||
static struct packed_irq irq_info[NR_IRQS];
|
||||
|
||||
/* Binding types. */
|
||||
enum {
|
||||
IRQT_UNBOUND,
|
||||
IRQT_PIRQ,
|
||||
IRQT_VIRQ,
|
||||
IRQT_IPI,
|
||||
IRQT_EVTCHN
|
||||
};
|
||||
|
||||
/* Convenient shorthand for packed representation of an unbound IRQ. */
|
||||
#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
|
||||
|
||||
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
|
||||
[0 ... NR_EVENT_CHANNELS-1] = -1
|
||||
};
|
||||
static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
|
||||
static u8 cpu_evtchn[NR_EVENT_CHANNELS];
|
||||
|
||||
/* Reference counts for bindings to IRQs. */
|
||||
static int irq_bindcount[NR_IRQS];
|
||||
|
||||
/* Xen will never allocate port zero for any purpose. */
|
||||
#define VALID_EVTCHN(chn) ((chn) != 0)
|
||||
|
||||
/*
|
||||
* Force a proper event-channel callback from Xen after clearing the
|
||||
* callback mask. We do this in a very simple manner, by making a call
|
||||
* down into Xen. The pending flag will be checked by Xen on return.
|
||||
*/
|
||||
void force_evtchn_callback(void)
|
||||
{
|
||||
(void)HYPERVISOR_xen_version(0, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(force_evtchn_callback);
|
||||
|
||||
static struct irq_chip xen_dynamic_chip;
|
||||
|
||||
/* Constructor for packed IRQ information. */
|
||||
static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
|
||||
{
|
||||
return (struct packed_irq) { evtchn, index, type };
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessors for packed IRQ information.
|
||||
*/
|
||||
static inline unsigned int evtchn_from_irq(int irq)
|
||||
{
|
||||
return irq_info[irq].evtchn;
|
||||
}
|
||||
|
||||
static inline unsigned int index_from_irq(int irq)
|
||||
{
|
||||
return irq_info[irq].index;
|
||||
}
|
||||
|
||||
static inline unsigned int type_from_irq(int irq)
|
||||
{
|
||||
return irq_info[irq].type;
|
||||
}
|
||||
|
||||
static inline unsigned long active_evtchns(unsigned int cpu,
|
||||
struct shared_info *sh,
|
||||
unsigned int idx)
|
||||
{
|
||||
return (sh->evtchn_pending[idx] &
|
||||
cpu_evtchn_mask[cpu][idx] &
|
||||
~sh->evtchn_mask[idx]);
|
||||
}
|
||||
|
||||
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
|
||||
{
|
||||
int irq = evtchn_to_irq[chn];
|
||||
|
||||
BUG_ON(irq == -1);
|
||||
#ifdef CONFIG_SMP
|
||||
irq_desc[irq].affinity = cpumask_of_cpu(cpu);
|
||||
#endif
|
||||
|
||||
__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
|
||||
__set_bit(chn, cpu_evtchn_mask[cpu]);
|
||||
|
||||
cpu_evtchn[chn] = cpu;
|
||||
}
|
||||
|
||||
static void init_evtchn_cpu_bindings(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
int i;
|
||||
/* By default all event channels notify CPU#0. */
|
||||
for (i = 0; i < NR_IRQS; i++)
|
||||
irq_desc[i].affinity = cpumask_of_cpu(0);
|
||||
#endif
|
||||
|
||||
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
|
||||
memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
|
||||
}
|
||||
|
||||
static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
|
||||
{
|
||||
return cpu_evtchn[evtchn];
|
||||
}
|
||||
|
||||
static inline void clear_evtchn(int port)
|
||||
{
|
||||
struct shared_info *s = HYPERVISOR_shared_info;
|
||||
sync_clear_bit(port, &s->evtchn_pending[0]);
|
||||
}
|
||||
|
||||
static inline void set_evtchn(int port)
|
||||
{
|
||||
struct shared_info *s = HYPERVISOR_shared_info;
|
||||
sync_set_bit(port, &s->evtchn_pending[0]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* notify_remote_via_irq - send event to remote end of event channel via irq
|
||||
* @irq: irq of event channel to send event to
|
||||
*
|
||||
* Unlike notify_remote_via_evtchn(), this is safe to use across
|
||||
* save/restore. Notifications on a broken connection are silently
|
||||
* dropped.
|
||||
*/
|
||||
void notify_remote_via_irq(int irq)
|
||||
{
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
if (VALID_EVTCHN(evtchn))
|
||||
notify_remote_via_evtchn(evtchn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
|
||||
|
||||
static void mask_evtchn(int port)
|
||||
{
|
||||
struct shared_info *s = HYPERVISOR_shared_info;
|
||||
sync_set_bit(port, &s->evtchn_mask[0]);
|
||||
}
|
||||
|
||||
static void unmask_evtchn(int port)
|
||||
{
|
||||
struct shared_info *s = HYPERVISOR_shared_info;
|
||||
unsigned int cpu = get_cpu();
|
||||
|
||||
BUG_ON(!irqs_disabled());
|
||||
|
||||
/* Slow path (hypercall) if this is a non-local port. */
|
||||
if (unlikely(cpu != cpu_from_evtchn(port))) {
|
||||
struct evtchn_unmask unmask = { .port = port };
|
||||
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
|
||||
} else {
|
||||
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
|
||||
|
||||
sync_clear_bit(port, &s->evtchn_mask[0]);
|
||||
|
||||
/*
|
||||
* The following is basically the equivalent of
|
||||
* 'hw_resend_irq'. Just like a real IO-APIC we 'lose
|
||||
* the interrupt edge' if the channel is masked.
|
||||
*/
|
||||
if (sync_test_bit(port, &s->evtchn_pending[0]) &&
|
||||
!sync_test_and_set_bit(port / BITS_PER_LONG,
|
||||
&vcpu_info->evtchn_pending_sel))
|
||||
vcpu_info->evtchn_upcall_pending = 1;
|
||||
}
|
||||
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static int find_unbound_irq(void)
|
||||
{
|
||||
int irq;
|
||||
|
||||
/* Only allocate from dynirq range */
|
||||
for (irq = 0; irq < NR_IRQS; irq++)
|
||||
if (irq_bindcount[irq] == 0)
|
||||
break;
|
||||
|
||||
if (irq == NR_IRQS)
|
||||
panic("No available IRQ to bind to: increase NR_IRQS!\n");
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
int bind_evtchn_to_irq(unsigned int evtchn)
|
||||
{
|
||||
int irq;
|
||||
|
||||
spin_lock(&irq_mapping_update_lock);
|
||||
|
||||
irq = evtchn_to_irq[evtchn];
|
||||
|
||||
if (irq == -1) {
|
||||
irq = find_unbound_irq();
|
||||
|
||||
dynamic_irq_init(irq);
|
||||
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
|
||||
handle_level_irq, "event");
|
||||
|
||||
evtchn_to_irq[evtchn] = irq;
|
||||
irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
|
||||
}
|
||||
|
||||
irq_bindcount[irq]++;
|
||||
|
||||
spin_unlock(&irq_mapping_update_lock);
|
||||
|
||||
return irq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
|
||||
|
||||
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
|
||||
{
|
||||
struct evtchn_bind_ipi bind_ipi;
|
||||
int evtchn, irq;
|
||||
|
||||
spin_lock(&irq_mapping_update_lock);
|
||||
|
||||
irq = per_cpu(ipi_to_irq, cpu)[ipi];
|
||||
if (irq == -1) {
|
||||
irq = find_unbound_irq();
|
||||
if (irq < 0)
|
||||
goto out;
|
||||
|
||||
dynamic_irq_init(irq);
|
||||
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
|
||||
handle_level_irq, "ipi");
|
||||
|
||||
bind_ipi.vcpu = cpu;
|
||||
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
|
||||
&bind_ipi) != 0)
|
||||
BUG();
|
||||
evtchn = bind_ipi.port;
|
||||
|
||||
evtchn_to_irq[evtchn] = irq;
|
||||
irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
||||
|
||||
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
|
||||
|
||||
bind_evtchn_to_cpu(evtchn, cpu);
|
||||
}
|
||||
|
||||
irq_bindcount[irq]++;
|
||||
|
||||
out:
|
||||
spin_unlock(&irq_mapping_update_lock);
|
||||
return irq;
|
||||
}
|
||||
|
||||
|
||||
static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
|
||||
{
|
||||
struct evtchn_bind_virq bind_virq;
|
||||
int evtchn, irq;
|
||||
|
||||
spin_lock(&irq_mapping_update_lock);
|
||||
|
||||
irq = per_cpu(virq_to_irq, cpu)[virq];
|
||||
|
||||
if (irq == -1) {
|
||||
bind_virq.virq = virq;
|
||||
bind_virq.vcpu = cpu;
|
||||
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
|
||||
&bind_virq) != 0)
|
||||
BUG();
|
||||
evtchn = bind_virq.port;
|
||||
|
||||
irq = find_unbound_irq();
|
||||
|
||||
dynamic_irq_init(irq);
|
||||
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
|
||||
handle_level_irq, "virq");
|
||||
|
||||
evtchn_to_irq[evtchn] = irq;
|
||||
irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
||||
|
||||
per_cpu(virq_to_irq, cpu)[virq] = irq;
|
||||
|
||||
bind_evtchn_to_cpu(evtchn, cpu);
|
||||
}
|
||||
|
||||
irq_bindcount[irq]++;
|
||||
|
||||
spin_unlock(&irq_mapping_update_lock);
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
static void unbind_from_irq(unsigned int irq)
|
||||
{
|
||||
struct evtchn_close close;
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
spin_lock(&irq_mapping_update_lock);
|
||||
|
||||
if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
|
||||
close.port = evtchn;
|
||||
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
|
||||
BUG();
|
||||
|
||||
switch (type_from_irq(irq)) {
|
||||
case IRQT_VIRQ:
|
||||
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
|
||||
[index_from_irq(irq)] = -1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Closed ports are implicitly re-bound to VCPU0. */
|
||||
bind_evtchn_to_cpu(evtchn, 0);
|
||||
|
||||
evtchn_to_irq[evtchn] = -1;
|
||||
irq_info[irq] = IRQ_UNBOUND;
|
||||
|
||||
dynamic_irq_init(irq);
|
||||
}
|
||||
|
||||
spin_unlock(&irq_mapping_update_lock);
|
||||
}
|
||||
|
||||
int bind_evtchn_to_irqhandler(unsigned int evtchn,
|
||||
irqreturn_t (*handler)(int, void *),
|
||||
unsigned long irqflags,
|
||||
const char *devname, void *dev_id)
|
||||
{
|
||||
unsigned int irq;
|
||||
int retval;
|
||||
|
||||
irq = bind_evtchn_to_irq(evtchn);
|
||||
retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
||||
if (retval != 0) {
|
||||
unbind_from_irq(irq);
|
||||
return retval;
|
||||
}
|
||||
|
||||
return irq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
|
||||
|
||||
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
|
||||
irqreturn_t (*handler)(int, void *),
|
||||
unsigned long irqflags, const char *devname, void *dev_id)
|
||||
{
|
||||
unsigned int irq;
|
||||
int retval;
|
||||
|
||||
irq = bind_virq_to_irq(virq, cpu);
|
||||
retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
||||
if (retval != 0) {
|
||||
unbind_from_irq(irq);
|
||||
return retval;
|
||||
}
|
||||
|
||||
return irq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
|
||||
|
||||
int bind_ipi_to_irqhandler(enum ipi_vector ipi,
|
||||
unsigned int cpu,
|
||||
irq_handler_t handler,
|
||||
unsigned long irqflags,
|
||||
const char *devname,
|
||||
void *dev_id)
|
||||
{
|
||||
int irq, retval;
|
||||
|
||||
irq = bind_ipi_to_irq(ipi, cpu);
|
||||
if (irq < 0)
|
||||
return irq;
|
||||
|
||||
retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
||||
if (retval != 0) {
|
||||
unbind_from_irq(irq);
|
||||
return retval;
|
||||
}
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
void unbind_from_irqhandler(unsigned int irq, void *dev_id)
|
||||
{
|
||||
free_irq(irq, dev_id);
|
||||
unbind_from_irq(irq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
|
||||
|
||||
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
|
||||
{
|
||||
int irq = per_cpu(ipi_to_irq, cpu)[vector];
|
||||
BUG_ON(irq < 0);
|
||||
notify_remote_via_irq(irq);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Search the CPUs pending events bitmasks. For each one found, map
|
||||
* the event number to an irq, and feed it into do_IRQ() for
|
||||
* handling.
|
||||
*
|
||||
* Xen uses a two-level bitmap to speed searching. The first level is
|
||||
* a bitset of words which contain pending event bits. The second
|
||||
* level is a bitset of pending events themselves.
|
||||
*/
|
||||
fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
struct shared_info *s = HYPERVISOR_shared_info;
|
||||
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
|
||||
unsigned long pending_words;
|
||||
|
||||
vcpu_info->evtchn_upcall_pending = 0;
|
||||
|
||||
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
|
||||
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
|
||||
while (pending_words != 0) {
|
||||
unsigned long pending_bits;
|
||||
int word_idx = __ffs(pending_words);
|
||||
pending_words &= ~(1UL << word_idx);
|
||||
|
||||
while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
|
||||
int bit_idx = __ffs(pending_bits);
|
||||
int port = (word_idx * BITS_PER_LONG) + bit_idx;
|
||||
int irq = evtchn_to_irq[port];
|
||||
|
||||
if (irq != -1) {
|
||||
regs->orig_eax = ~irq;
|
||||
do_IRQ(regs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
/* Rebind an evtchn so that it gets delivered to a specific cpu */
|
||||
static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
|
||||
{
|
||||
struct evtchn_bind_vcpu bind_vcpu;
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
if (!VALID_EVTCHN(evtchn))
|
||||
return;
|
||||
|
||||
/* Send future instances of this interrupt to other vcpu. */
|
||||
bind_vcpu.port = evtchn;
|
||||
bind_vcpu.vcpu = tcpu;
|
||||
|
||||
/*
|
||||
* If this fails, it usually just indicates that we're dealing with a
|
||||
* virq or IPI channel, which don't actually need to be rebound. Ignore
|
||||
* it, but don't do the xenlinux-level rebind in that case.
|
||||
*/
|
||||
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
|
||||
bind_evtchn_to_cpu(evtchn, tcpu);
|
||||
}
|
||||
|
||||
|
||||
static void set_affinity_irq(unsigned irq, cpumask_t dest)
|
||||
{
|
||||
unsigned tcpu = first_cpu(dest);
|
||||
rebind_irq_to_cpu(irq, tcpu);
|
||||
}
|
||||
|
||||
static void enable_dynirq(unsigned int irq)
|
||||
{
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
if (VALID_EVTCHN(evtchn))
|
||||
unmask_evtchn(evtchn);
|
||||
}
|
||||
|
||||
static void disable_dynirq(unsigned int irq)
|
||||
{
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
if (VALID_EVTCHN(evtchn))
|
||||
mask_evtchn(evtchn);
|
||||
}
|
||||
|
||||
static void ack_dynirq(unsigned int irq)
|
||||
{
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
|
||||
move_native_irq(irq);
|
||||
|
||||
if (VALID_EVTCHN(evtchn))
|
||||
clear_evtchn(evtchn);
|
||||
}
|
||||
|
||||
static int retrigger_dynirq(unsigned int irq)
|
||||
{
|
||||
int evtchn = evtchn_from_irq(irq);
|
||||
int ret = 0;
|
||||
|
||||
if (VALID_EVTCHN(evtchn)) {
|
||||
set_evtchn(evtchn);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct irq_chip xen_dynamic_chip __read_mostly = {
|
||||
.name = "xen-dyn",
|
||||
.mask = disable_dynirq,
|
||||
.unmask = enable_dynirq,
|
||||
.ack = ack_dynirq,
|
||||
.set_affinity = set_affinity_irq,
|
||||
.retrigger = retrigger_dynirq,
|
||||
};
|
||||
|
||||
void __init xen_init_IRQ(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
init_evtchn_cpu_bindings();
|
||||
|
||||
/* No event channels are 'live' right now. */
|
||||
for (i = 0; i < NR_EVENT_CHANNELS; i++)
|
||||
mask_evtchn(i);
|
||||
|
||||
/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
|
||||
for (i = 0; i < NR_IRQS; i++)
|
||||
irq_bindcount[i] = 0;
|
||||
|
||||
irq_ctx_init(smp_processor_id());
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/******************************************************************************
|
||||
* features.c
|
||||
*
|
||||
* Xen feature flags.
|
||||
*
|
||||
* Copyright (c) 2006, Ian Campbell, XenSource Inc.
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/features.h>
|
||||
|
||||
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(xen_features);
|
||||
|
||||
void xen_setup_features(void)
|
||||
{
|
||||
struct xen_feature_info fi;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
|
||||
fi.submap_idx = i;
|
||||
if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
|
||||
break;
|
||||
for (j = 0; j < 32; j++)
|
||||
xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Handle extern requests for shutdown, reboot and sysrq
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/sysrq.h>
|
||||
|
||||
#include <xen/xenbus.h>
|
||||
|
||||
#define SHUTDOWN_INVALID -1
|
||||
#define SHUTDOWN_POWEROFF 0
|
||||
#define SHUTDOWN_SUSPEND 2
|
||||
/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
|
||||
* report a crash, not be instructed to crash!
|
||||
* HALT is the same as POWEROFF, as far as we're concerned. The tools use
|
||||
* the distinction when we return the reason code to them.
|
||||
*/
|
||||
#define SHUTDOWN_HALT 4
|
||||
|
||||
/* Ignore multiple shutdown requests. */
|
||||
static int shutting_down = SHUTDOWN_INVALID;
|
||||
|
||||
static void shutdown_handler(struct xenbus_watch *watch,
|
||||
const char **vec, unsigned int len)
|
||||
{
|
||||
char *str;
|
||||
struct xenbus_transaction xbt;
|
||||
int err;
|
||||
|
||||
if (shutting_down != SHUTDOWN_INVALID)
|
||||
return;
|
||||
|
||||
again:
|
||||
err = xenbus_transaction_start(&xbt);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
|
||||
/* Ignore read errors and empty reads. */
|
||||
if (XENBUS_IS_ERR_READ(str)) {
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
xenbus_write(xbt, "control", "shutdown", "");
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err == -EAGAIN) {
|
||||
kfree(str);
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (strcmp(str, "poweroff") == 0 ||
|
||||
strcmp(str, "halt") == 0)
|
||||
orderly_poweroff(false);
|
||||
else if (strcmp(str, "reboot") == 0)
|
||||
ctrl_alt_del();
|
||||
else {
|
||||
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
|
||||
shutting_down = SHUTDOWN_INVALID;
|
||||
}
|
||||
|
||||
kfree(str);
|
||||
}
|
||||
|
||||
static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
|
||||
unsigned int len)
|
||||
{
|
||||
char sysrq_key = '\0';
|
||||
struct xenbus_transaction xbt;
|
||||
int err;
|
||||
|
||||
again:
|
||||
err = xenbus_transaction_start(&xbt);
|
||||
if (err)
|
||||
return;
|
||||
if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
|
||||
printk(KERN_ERR "Unable to read sysrq code in "
|
||||
"control/sysrq\n");
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sysrq_key != '\0')
|
||||
xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
|
||||
if (sysrq_key != '\0')
|
||||
handle_sysrq(sysrq_key, NULL);
|
||||
}
|
||||
|
||||
static struct xenbus_watch shutdown_watch = {
|
||||
.node = "control/shutdown",
|
||||
.callback = shutdown_handler
|
||||
};
|
||||
|
||||
static struct xenbus_watch sysrq_watch = {
|
||||
.node = "control/sysrq",
|
||||
.callback = sysrq_handler
|
||||
};
|
||||
|
||||
static int setup_shutdown_watcher(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = register_xenbus_watch(&shutdown_watch);
|
||||
if (err) {
|
||||
printk(KERN_ERR "Failed to set shutdown watcher\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = register_xenbus_watch(&sysrq_watch);
|
||||
if (err) {
|
||||
printk(KERN_ERR "Failed to set sysrq watcher\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shutdown_event(struct notifier_block *notifier,
|
||||
unsigned long event,
|
||||
void *data)
|
||||
{
|
||||
setup_shutdown_watcher();
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int __init setup_shutdown_event(void)
|
||||
{
|
||||
static struct notifier_block xenstore_notifier = {
|
||||
.notifier_call = shutdown_event
|
||||
};
|
||||
register_xenstore_notifier(&xenstore_notifier);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
subsys_initcall(setup_shutdown_event);
|
|
@ -0,0 +1,564 @@
|
|||
/*
|
||||
* Xen mmu operations
|
||||
*
|
||||
* This file contains the various mmu fetch and update operations.
|
||||
* The most important job they must perform is the mapping between the
|
||||
* domain's pfn and the overall machine mfns.
|
||||
*
|
||||
* Xen allows guests to directly update the pagetable, in a controlled
|
||||
* fashion. In other words, the guest modifies the same pagetable
|
||||
* that the CPU actually uses, which eliminates the overhead of having
|
||||
* a separate shadow pagetable.
|
||||
*
|
||||
* In order to allow this, it falls on the guest domain to map its
|
||||
* notion of a "physical" pfn - which is just a domain-local linear
|
||||
* address - into a real "machine address" which the CPU's MMU can
|
||||
* use.
|
||||
*
|
||||
* A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
|
||||
* inserted directly into the pagetable. When creating a new
|
||||
* pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
|
||||
* when reading the content back with __(pgd|pmd|pte)_val, it converts
|
||||
* the mfn back into a pfn.
|
||||
*
|
||||
* The other constraint is that all pages which make up a pagetable
|
||||
* must be mapped read-only in the guest. This prevents uncontrolled
|
||||
* guest updates to the pagetable. Xen strictly enforces this, and
|
||||
* will disallow any pagetable update which will end up mapping a
|
||||
* pagetable page RW, and will disallow using any writable page as a
|
||||
* pagetable.
|
||||
*
|
||||
* Naively, when loading %cr3 with the base of a new pagetable, Xen
|
||||
* would need to validate the whole pagetable before going on.
|
||||
* Naturally, this is quite slow. The solution is to "pin" a
|
||||
* pagetable, which enforces all the constraints on the pagetable even
|
||||
* when it is not actively in use. This menas that Xen can be assured
|
||||
* that it is still valid when you do load it into %cr3, and doesn't
|
||||
* need to revalidate it.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
#include <xen/page.h>
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#include "multicalls.h"
|
||||
#include "mmu.h"
|
||||
|
||||
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
|
||||
{
|
||||
pte_t *pte = lookup_address(address);
|
||||
unsigned offset = address & PAGE_MASK;
|
||||
|
||||
BUG_ON(pte == NULL);
|
||||
|
||||
return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
|
||||
}
|
||||
|
||||
void make_lowmem_page_readonly(void *vaddr)
|
||||
{
|
||||
pte_t *pte, ptev;
|
||||
unsigned long address = (unsigned long)vaddr;
|
||||
|
||||
pte = lookup_address(address);
|
||||
BUG_ON(pte == NULL);
|
||||
|
||||
ptev = pte_wrprotect(*pte);
|
||||
|
||||
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
|
||||
BUG();
|
||||
}
|
||||
|
||||
void make_lowmem_page_readwrite(void *vaddr)
|
||||
{
|
||||
pte_t *pte, ptev;
|
||||
unsigned long address = (unsigned long)vaddr;
|
||||
|
||||
pte = lookup_address(address);
|
||||
BUG_ON(pte == NULL);
|
||||
|
||||
ptev = pte_mkwrite(*pte);
|
||||
|
||||
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
|
||||
BUG();
|
||||
}
|
||||
|
||||
|
||||
void xen_set_pmd(pmd_t *ptr, pmd_t val)
|
||||
{
|
||||
struct multicall_space mcs;
|
||||
struct mmu_update *u;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*u));
|
||||
u = mcs.args;
|
||||
u->ptr = virt_to_machine(ptr).maddr;
|
||||
u->val = pmd_val_ma(val);
|
||||
MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Associate a virtual page frame with a given physical page frame
|
||||
* and protection flags for that frame.
|
||||
*/
|
||||
void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pgd = swapper_pg_dir + pgd_index(vaddr);
|
||||
if (pgd_none(*pgd)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
if (pud_none(*pud)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
if (pmd_none(*pmd)) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
/* <mfn,flags> stored as-is, to permit clearing entries */
|
||||
xen_set_pte(pte, mfn_pte(mfn, flags));
|
||||
|
||||
/*
|
||||
* It's enough to flush this one mapping.
|
||||
* (PGE mappings get flushed as well)
|
||||
*/
|
||||
__flush_tlb_one(vaddr);
|
||||
}
|
||||
|
||||
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
if (mm == current->mm || mm == &init_mm) {
|
||||
if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
|
||||
struct multicall_space mcs;
|
||||
mcs = xen_mc_entry(0);
|
||||
|
||||
MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
return;
|
||||
} else
|
||||
if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
|
||||
return;
|
||||
}
|
||||
xen_set_pte(ptep, pteval);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
void xen_set_pud(pud_t *ptr, pud_t val)
|
||||
{
|
||||
struct multicall_space mcs;
|
||||
struct mmu_update *u;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
mcs = xen_mc_entry(sizeof(*u));
|
||||
u = mcs.args;
|
||||
u->ptr = virt_to_machine(ptr).maddr;
|
||||
u->val = pud_val_ma(val);
|
||||
MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void xen_set_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
ptep->pte_high = pte.pte_high;
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
set_64bit((u64 *)ptep, pte_val_ma(pte));
|
||||
}
|
||||
|
||||
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
ptep->pte_low = 0;
|
||||
smp_wmb(); /* make sure low gets written first */
|
||||
ptep->pte_high = 0;
|
||||
}
|
||||
|
||||
void xen_pmd_clear(pmd_t *pmdp)
|
||||
{
|
||||
xen_set_pmd(pmdp, __pmd(0));
|
||||
}
|
||||
|
||||
unsigned long long xen_pte_val(pte_t pte)
|
||||
{
|
||||
unsigned long long ret = 0;
|
||||
|
||||
if (pte.pte_low) {
|
||||
ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
|
||||
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long long xen_pmd_val(pmd_t pmd)
|
||||
{
|
||||
unsigned long long ret = pmd.pmd;
|
||||
if (ret)
|
||||
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long long xen_pgd_val(pgd_t pgd)
|
||||
{
|
||||
unsigned long long ret = pgd.pgd;
|
||||
if (ret)
|
||||
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
pte_t xen_make_pte(unsigned long long pte)
|
||||
{
|
||||
if (pte & 1)
|
||||
pte = phys_to_machine(XPADDR(pte)).maddr;
|
||||
|
||||
return (pte_t){ pte, pte >> 32 };
|
||||
}
|
||||
|
||||
pmd_t xen_make_pmd(unsigned long long pmd)
|
||||
{
|
||||
if (pmd & 1)
|
||||
pmd = phys_to_machine(XPADDR(pmd)).maddr;
|
||||
|
||||
return (pmd_t){ pmd };
|
||||
}
|
||||
|
||||
pgd_t xen_make_pgd(unsigned long long pgd)
|
||||
{
|
||||
if (pgd & _PAGE_PRESENT)
|
||||
pgd = phys_to_machine(XPADDR(pgd)).maddr;
|
||||
|
||||
return (pgd_t){ pgd };
|
||||
}
|
||||
#else /* !PAE */
|
||||
void xen_set_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
*ptep = pte;
|
||||
}
|
||||
|
||||
unsigned long xen_pte_val(pte_t pte)
|
||||
{
|
||||
unsigned long ret = pte.pte_low;
|
||||
|
||||
if (ret & _PAGE_PRESENT)
|
||||
ret = machine_to_phys(XMADDR(ret)).paddr;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long xen_pgd_val(pgd_t pgd)
|
||||
{
|
||||
unsigned long ret = pgd.pgd;
|
||||
if (ret)
|
||||
ret = machine_to_phys(XMADDR(ret)).paddr | 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
pte_t xen_make_pte(unsigned long pte)
|
||||
{
|
||||
if (pte & _PAGE_PRESENT)
|
||||
pte = phys_to_machine(XPADDR(pte)).maddr;
|
||||
|
||||
return (pte_t){ pte };
|
||||
}
|
||||
|
||||
pgd_t xen_make_pgd(unsigned long pgd)
|
||||
{
|
||||
if (pgd & _PAGE_PRESENT)
|
||||
pgd = phys_to_machine(XPADDR(pgd)).maddr;
|
||||
|
||||
return (pgd_t){ pgd };
|
||||
}
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
(Yet another) pagetable walker. This one is intended for pinning a
|
||||
pagetable. This means that it walks a pagetable and calls the
|
||||
callback function on each page it finds making up the page table,
|
||||
at every level. It walks the entire pagetable, but it only bothers
|
||||
pinning pte pages which are below pte_limit. In the normal case
|
||||
this will be TASK_SIZE, but at boot we need to pin up to
|
||||
FIXADDR_TOP. But the important bit is that we don't pin beyond
|
||||
there, because then we start getting into Xen's ptes.
|
||||
*/
|
||||
static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
|
||||
unsigned long limit)
|
||||
{
|
||||
pgd_t *pgd = pgd_base;
|
||||
int flush = 0;
|
||||
unsigned long addr = 0;
|
||||
unsigned long pgd_next;
|
||||
|
||||
BUG_ON(limit > FIXADDR_TOP);
|
||||
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap))
|
||||
return 0;
|
||||
|
||||
for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
|
||||
pud_t *pud;
|
||||
unsigned long pud_limit, pud_next;
|
||||
|
||||
pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
|
||||
|
||||
if (!pgd_val(*pgd))
|
||||
continue;
|
||||
|
||||
pud = pud_offset(pgd, 0);
|
||||
|
||||
if (PTRS_PER_PUD > 1) /* not folded */
|
||||
flush |= (*func)(virt_to_page(pud), 0);
|
||||
|
||||
for (; addr != pud_limit; pud++, addr = pud_next) {
|
||||
pmd_t *pmd;
|
||||
unsigned long pmd_limit;
|
||||
|
||||
pud_next = pud_addr_end(addr, pud_limit);
|
||||
|
||||
if (pud_next < limit)
|
||||
pmd_limit = pud_next;
|
||||
else
|
||||
pmd_limit = limit;
|
||||
|
||||
if (pud_none(*pud))
|
||||
continue;
|
||||
|
||||
pmd = pmd_offset(pud, 0);
|
||||
|
||||
if (PTRS_PER_PMD > 1) /* not folded */
|
||||
flush |= (*func)(virt_to_page(pmd), 0);
|
||||
|
||||
for (; addr != pmd_limit; pmd++) {
|
||||
addr += (PAGE_SIZE * PTRS_PER_PTE);
|
||||
if ((pmd_limit-1) < (addr-1)) {
|
||||
addr = pmd_limit;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
flush |= (*func)(pmd_page(*pmd), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH);
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
static int pin_page(struct page *page, unsigned flags)
|
||||
{
|
||||
unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
|
||||
int flush;
|
||||
|
||||
if (pgfl)
|
||||
flush = 0; /* already pinned */
|
||||
else if (PageHighMem(page))
|
||||
/* kmaps need flushing if we found an unpinned
|
||||
highpage */
|
||||
flush = 1;
|
||||
else {
|
||||
void *pt = lowmem_page_address(page);
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
struct multicall_space mcs = __xen_mc_entry(0);
|
||||
|
||||
flush = 0;
|
||||
|
||||
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
||||
pfn_pte(pfn, PAGE_KERNEL_RO),
|
||||
flags);
|
||||
}
|
||||
|
||||
return flush;
|
||||
}
|
||||
|
||||
/* This is called just after a mm has been created, but it has not
|
||||
been used yet. We need to make sure that its pagetable is all
|
||||
read-only, and can be pinned. */
|
||||
void xen_pgd_pin(pgd_t *pgd)
|
||||
{
|
||||
struct multicall_space mcs;
|
||||
struct mmuext_op *op;
|
||||
|
||||
xen_mc_batch();
|
||||
|
||||
if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
|
||||
/* re-enable interrupts for kmap_flush_unused */
|
||||
xen_mc_issue(0);
|
||||
kmap_flush_unused();
|
||||
xen_mc_batch();
|
||||
}
|
||||
|
||||
mcs = __xen_mc_entry(sizeof(*op));
|
||||
op = mcs.args;
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
op->cmd = MMUEXT_PIN_L3_TABLE;
|
||||
#else
|
||||
op->cmd = MMUEXT_PIN_L2_TABLE;
|
||||
#endif
|
||||
op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(0);
|
||||
}
|
||||
|
||||
/* The init_mm pagetable is really pinned as soon as its created, but
|
||||
that's before we have page structures to store the bits. So do all
|
||||
the book-keeping now. */
|
||||
static __init int mark_pinned(struct page *page, unsigned flags)
|
||||
{
|
||||
SetPagePinned(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init xen_mark_init_mm_pinned(void)
|
||||
{
|
||||
pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
|
||||
}
|
||||
|
||||
static int unpin_page(struct page *page, unsigned flags)
|
||||
{
|
||||
unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
|
||||
|
||||
if (pgfl && !PageHighMem(page)) {
|
||||
void *pt = lowmem_page_address(page);
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
struct multicall_space mcs = __xen_mc_entry(0);
|
||||
|
||||
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
|
||||
pfn_pte(pfn, PAGE_KERNEL),
|
||||
flags);
|
||||
}
|
||||
|
||||
return 0; /* never need to flush on unpin */
|
||||
}
|
||||
|
||||
/* Release a pagetables pages back as normal RW */
|
||||
static void xen_pgd_unpin(pgd_t *pgd)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
|
||||
xen_mc_batch();
|
||||
|
||||
mcs = __xen_mc_entry(sizeof(*op));
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_UNPIN_TABLE;
|
||||
op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
pgd_walk(pgd, unpin_page, TASK_SIZE);
|
||||
|
||||
xen_mc_issue(0);
|
||||
}
|
||||
|
||||
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
||||
{
|
||||
spin_lock(&next->page_table_lock);
|
||||
xen_pgd_pin(next->pgd);
|
||||
spin_unlock(&next->page_table_lock);
|
||||
}
|
||||
|
||||
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
{
|
||||
spin_lock(&mm->page_table_lock);
|
||||
xen_pgd_pin(mm->pgd);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Another cpu may still have their %cr3 pointing at the pagetable, so
|
||||
we need to repoint it somewhere else before we can unpin it. */
|
||||
static void drop_other_mm_ref(void *info)
|
||||
{
|
||||
struct mm_struct *mm = info;
|
||||
|
||||
if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
|
||||
leave_mm(smp_processor_id());
|
||||
}
|
||||
|
||||
static void drop_mm_ref(struct mm_struct *mm)
|
||||
{
|
||||
if (current->active_mm == mm) {
|
||||
if (current->mm == mm)
|
||||
load_cr3(swapper_pg_dir);
|
||||
else
|
||||
leave_mm(smp_processor_id());
|
||||
}
|
||||
|
||||
if (!cpus_empty(mm->cpu_vm_mask))
|
||||
xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
|
||||
mm, 1);
|
||||
}
|
||||
#else
|
||||
static void drop_mm_ref(struct mm_struct *mm)
|
||||
{
|
||||
if (current->active_mm == mm)
|
||||
load_cr3(swapper_pg_dir);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* While a process runs, Xen pins its pagetables, which means that the
|
||||
* hypervisor forces it to be read-only, and it controls all updates
|
||||
* to it. This means that all pagetable updates have to go via the
|
||||
* hypervisor, which is moderately expensive.
|
||||
*
|
||||
* Since we're pulling the pagetable down, we switch to use init_mm,
|
||||
* unpin old process pagetable and mark it all read-write, which
|
||||
* allows further operations on it to be simple memory accesses.
|
||||
*
|
||||
* The only subtle point is that another CPU may be still using the
|
||||
* pagetable because of lazy tlb flushing. This means we need need to
|
||||
* switch all CPUs off this pagetable before we can unpin it.
|
||||
*/
|
||||
void xen_exit_mmap(struct mm_struct *mm)
|
||||
{
|
||||
get_cpu(); /* make sure we don't move around */
|
||||
drop_mm_ref(mm);
|
||||
put_cpu();
|
||||
|
||||
spin_lock(&mm->page_table_lock);
|
||||
xen_pgd_unpin(mm->pgd);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
#ifndef _XEN_MMU_H
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
* Page-directory addresses above 4GB do not fit into architectural %cr3.
|
||||
* When accessing %cr3, or equivalent field in vcpu_guest_context, guests
|
||||
* must use the following accessor macros to pack/unpack valid MFNs.
|
||||
*
|
||||
* Note that Xen is using the fact that the pagetable base is always
|
||||
* page-aligned, and putting the 12 MSB of the address into the 12 LSB
|
||||
* of cr3.
|
||||
*/
|
||||
#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
|
||||
#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
|
||||
|
||||
|
||||
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
|
||||
|
||||
void xen_set_pte(pte_t *ptep, pte_t pteval);
|
||||
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval);
|
||||
void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
|
||||
|
||||
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
|
||||
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
||||
void xen_exit_mmap(struct mm_struct *mm);
|
||||
|
||||
void xen_pgd_pin(pgd_t *pgd);
|
||||
//void xen_pgd_unpin(pgd_t *pgd);
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
unsigned long long xen_pte_val(pte_t);
|
||||
unsigned long long xen_pmd_val(pmd_t);
|
||||
unsigned long long xen_pgd_val(pgd_t);
|
||||
|
||||
pte_t xen_make_pte(unsigned long long);
|
||||
pmd_t xen_make_pmd(unsigned long long);
|
||||
pgd_t xen_make_pgd(unsigned long long);
|
||||
|
||||
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval);
|
||||
void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
|
||||
void xen_set_pud(pud_t *ptr, pud_t val);
|
||||
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void xen_pmd_clear(pmd_t *pmdp);
|
||||
|
||||
|
||||
#else
|
||||
unsigned long xen_pte_val(pte_t);
|
||||
unsigned long xen_pmd_val(pmd_t);
|
||||
unsigned long xen_pgd_val(pgd_t);
|
||||
|
||||
pte_t xen_make_pte(unsigned long);
|
||||
pmd_t xen_make_pmd(unsigned long);
|
||||
pgd_t xen_make_pgd(unsigned long);
|
||||
#endif
|
||||
|
||||
#endif /* _XEN_MMU_H */
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Xen hypercall batching.
|
||||
*
|
||||
* Xen allows multiple hypercalls to be issued at once, using the
|
||||
* multicall interface. This allows the cost of trapping into the
|
||||
* hypervisor to be amortized over several calls.
|
||||
*
|
||||
* This file implements a simple interface for multicalls. There's a
|
||||
* per-cpu buffer of outstanding multicalls. When you want to queue a
|
||||
* multicall for issuing, you can allocate a multicall slot for the
|
||||
* call and its arguments, along with storage for space which is
|
||||
* pointed to by the arguments (for passing pointers to structures,
|
||||
* etc). When the multicall is actually issued, all the space for the
|
||||
* commands and allocated memory is freed for reuse.
|
||||
*
|
||||
* Multicalls are flushed whenever any of the buffers get full, or
|
||||
* when explicitly requested. There's no way to get per-multicall
|
||||
* return results back. It will BUG if any of the multicalls fail.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
|
||||
*/
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include "multicalls.h"
|
||||
|
||||
#define MC_BATCH 32
|
||||
#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
|
||||
|
||||
struct mc_buffer {
|
||||
struct multicall_entry entries[MC_BATCH];
|
||||
u64 args[MC_ARGS];
|
||||
unsigned mcidx, argidx;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
|
||||
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
|
||||
|
||||
void xen_mc_flush(void)
|
||||
{
|
||||
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(preemptible());
|
||||
|
||||
/* Disable interrupts in case someone comes in and queues
|
||||
something in the middle */
|
||||
local_irq_save(flags);
|
||||
|
||||
if (b->mcidx) {
|
||||
int i;
|
||||
|
||||
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
|
||||
BUG();
|
||||
for (i = 0; i < b->mcidx; i++)
|
||||
if (b->entries[i].result < 0)
|
||||
ret++;
|
||||
b->mcidx = 0;
|
||||
b->argidx = 0;
|
||||
} else
|
||||
BUG_ON(b->argidx != 0);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
struct multicall_space __xen_mc_entry(size_t args)
|
||||
{
|
||||
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
|
||||
struct multicall_space ret;
|
||||
unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
|
||||
|
||||
BUG_ON(preemptible());
|
||||
BUG_ON(argspace > MC_ARGS);
|
||||
|
||||
if (b->mcidx == MC_BATCH ||
|
||||
(b->argidx + argspace) > MC_ARGS)
|
||||
xen_mc_flush();
|
||||
|
||||
ret.mc = &b->entries[b->mcidx];
|
||||
b->mcidx++;
|
||||
ret.args = &b->args[b->argidx];
|
||||
b->argidx += argspace;
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef _XEN_MULTICALLS_H
|
||||
#define _XEN_MULTICALLS_H
|
||||
|
||||
#include "xen-ops.h"
|
||||
|
||||
/* Multicalls */
|
||||
struct multicall_space
|
||||
{
|
||||
struct multicall_entry *mc;
|
||||
void *args;
|
||||
};
|
||||
|
||||
/* Allocate room for a multicall and its args */
|
||||
struct multicall_space __xen_mc_entry(size_t args);
|
||||
|
||||
DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
|
||||
|
||||
/* Call to start a batch of multiple __xen_mc_entry()s. Must be
|
||||
paired with xen_mc_issue() */
|
||||
static inline void xen_mc_batch(void)
|
||||
{
|
||||
/* need to disable interrupts until this entry is complete */
|
||||
local_irq_save(__get_cpu_var(xen_mc_irq_flags));
|
||||
}
|
||||
|
||||
static inline struct multicall_space xen_mc_entry(size_t args)
|
||||
{
|
||||
xen_mc_batch();
|
||||
return __xen_mc_entry(args);
|
||||
}
|
||||
|
||||
/* Flush all pending multicalls */
|
||||
void xen_mc_flush(void);
|
||||
|
||||
/* Issue a multicall if we're not in a lazy mode */
|
||||
static inline void xen_mc_issue(unsigned mode)
|
||||
{
|
||||
if ((xen_get_lazy_mode() & mode) == 0)
|
||||
xen_mc_flush();
|
||||
|
||||
/* restore flags saved in xen_mc_batch */
|
||||
local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
|
||||
}
|
||||
|
||||
#endif /* _XEN_MULTICALLS_H */
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Machine specific setup for xen
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pm.h>
|
||||
|
||||
#include <asm/elf.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include <xen/interface/physdev.h>
|
||||
#include <xen/features.h>
|
||||
|
||||
#include "xen-ops.h"
|
||||
|
||||
/* These are code, but not functions. Defined in entry.S */
|
||||
extern const char xen_hypervisor_callback[];
|
||||
extern const char xen_failsafe_callback[];
|
||||
|
||||
unsigned long *phys_to_machine_mapping;
|
||||
EXPORT_SYMBOL(phys_to_machine_mapping);
|
||||
|
||||
/**
|
||||
* machine_specific_memory_setup - Hook for machine specific memory setup.
|
||||
**/
|
||||
|
||||
char * __init xen_memory_setup(void)
|
||||
{
|
||||
unsigned long max_pfn = xen_start_info->nr_pages;
|
||||
|
||||
e820.nr_map = 0;
|
||||
add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
|
||||
|
||||
return "Xen";
|
||||
}
|
||||
|
||||
static void xen_idle(void)
|
||||
{
|
||||
local_irq_disable();
|
||||
|
||||
if (need_resched())
|
||||
local_irq_enable();
|
||||
else {
|
||||
current_thread_info()->status &= ~TS_POLLING;
|
||||
smp_mb__after_clear_bit();
|
||||
safe_halt();
|
||||
current_thread_info()->status |= TS_POLLING;
|
||||
}
|
||||
}
|
||||
|
||||
void __init xen_arch_setup(void)
|
||||
{
|
||||
struct physdev_set_iopl set_iopl;
|
||||
int rc;
|
||||
|
||||
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
|
||||
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
|
||||
|
||||
if (!xen_feature(XENFEAT_auto_translated_physmap))
|
||||
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
|
||||
|
||||
HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
|
||||
__KERNEL_CS, (unsigned long)xen_failsafe_callback);
|
||||
|
||||
set_iopl.iopl = 1;
|
||||
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
|
||||
if (rc != 0)
|
||||
printk(KERN_INFO "physdev_op failed %d\n", rc);
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
|
||||
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
|
||||
disable_acpi();
|
||||
}
|
||||
#endif
|
||||
|
||||
memcpy(boot_command_line, xen_start_info->cmd_line,
|
||||
MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
|
||||
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
|
||||
|
||||
pm_idle = xen_idle;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* fill cpus_possible with all available cpus */
|
||||
xen_fill_possible_map();
|
||||
#endif
|
||||
|
||||
paravirt_disable_iospace();
|
||||
}
|
|
@ -0,0 +1,404 @@
|
|||
/*
|
||||
* Xen SMP support
|
||||
*
|
||||
* This file implements the Xen versions of smp_ops. SMP under Xen is
|
||||
* very straightforward. Bringing a CPU up is simply a matter of
|
||||
* loading its initial context and setting it running.
|
||||
*
|
||||
* IPIs are handled through the Xen event mechanism.
|
||||
*
|
||||
* Because virtual CPUs can be scheduled onto any real CPU, there's no
|
||||
* useful topology information for the kernel to make use of. As a
|
||||
* result, all CPUs are treated as if they're single-core and
|
||||
* single-threaded.
|
||||
*
|
||||
* This does not handle HOTPLUG_CPU yet.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
||||
#include <asm/xen/interface.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include <xen/page.h>
|
||||
#include <xen/events.h>
|
||||
|
||||
#include "xen-ops.h"
|
||||
#include "mmu.h"
|
||||
|
||||
static cpumask_t cpu_initialized_map;
|
||||
static DEFINE_PER_CPU(int, resched_irq);
|
||||
static DEFINE_PER_CPU(int, callfunc_irq);
|
||||
|
||||
/*
|
||||
* Structure and data for smp_call_function(). This is designed to minimise
|
||||
* static memory requirements. It also looks cleaner.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(call_lock);
|
||||
|
||||
struct call_data_struct {
|
||||
void (*func) (void *info);
|
||||
void *info;
|
||||
atomic_t started;
|
||||
atomic_t finished;
|
||||
int wait;
|
||||
};
|
||||
|
||||
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
|
||||
|
||||
static struct call_data_struct *call_data;
|
||||
|
||||
/*
|
||||
* Reschedule call back. Nothing to do,
|
||||
* all the work is done automatically when
|
||||
* we return from the interrupt.
|
||||
*/
|
||||
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static __cpuinit void cpu_bringup_and_idle(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
cpu_init();
|
||||
|
||||
preempt_disable();
|
||||
per_cpu(cpu_state, cpu) = CPU_ONLINE;
|
||||
|
||||
xen_setup_cpu_clockevents();
|
||||
|
||||
/* We can take interrupts now: we're officially "up". */
|
||||
local_irq_enable();
|
||||
|
||||
wmb(); /* make sure everything is out */
|
||||
cpu_idle();
|
||||
}
|
||||
|
||||
static int xen_smp_intr_init(unsigned int cpu)
|
||||
{
|
||||
int rc;
|
||||
const char *resched_name, *callfunc_name;
|
||||
|
||||
per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
|
||||
|
||||
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
|
||||
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
|
||||
cpu,
|
||||
xen_reschedule_interrupt,
|
||||
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
|
||||
resched_name,
|
||||
NULL);
|
||||
if (rc < 0)
|
||||
goto fail;
|
||||
per_cpu(resched_irq, cpu) = rc;
|
||||
|
||||
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
|
||||
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
|
||||
cpu,
|
||||
xen_call_function_interrupt,
|
||||
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
|
||||
callfunc_name,
|
||||
NULL);
|
||||
if (rc < 0)
|
||||
goto fail;
|
||||
per_cpu(callfunc_irq, cpu) = rc;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (per_cpu(resched_irq, cpu) >= 0)
|
||||
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
|
||||
if (per_cpu(callfunc_irq, cpu) >= 0)
|
||||
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void __init xen_fill_possible_map(void)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < NR_CPUS; i++) {
|
||||
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
|
||||
if (rc >= 0)
|
||||
cpu_set(i, cpu_possible_map);
|
||||
}
|
||||
}
|
||||
|
||||
void __init xen_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
BUG_ON(smp_processor_id() != 0);
|
||||
native_smp_prepare_boot_cpu();
|
||||
|
||||
/* We've switched to the "real" per-cpu gdt, so make sure the
|
||||
old memory can be recycled */
|
||||
make_lowmem_page_readwrite(&per_cpu__gdt_page);
|
||||
|
||||
for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
||||
cpus_clear(cpu_sibling_map[cpu]);
|
||||
cpus_clear(cpu_core_map[cpu]);
|
||||
}
|
||||
|
||||
xen_setup_vcpu_info_placement();
|
||||
}
|
||||
|
||||
void __init xen_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
unsigned cpu;
|
||||
|
||||
for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
||||
cpus_clear(cpu_sibling_map[cpu]);
|
||||
cpus_clear(cpu_core_map[cpu]);
|
||||
}
|
||||
|
||||
smp_store_cpu_info(0);
|
||||
set_cpu_sibling_map(0);
|
||||
|
||||
if (xen_smp_intr_init(0))
|
||||
BUG();
|
||||
|
||||
cpu_initialized_map = cpumask_of_cpu(0);
|
||||
|
||||
/* Restrict the possible_map according to max_cpus. */
|
||||
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
|
||||
for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
|
||||
continue;
|
||||
cpu_clear(cpu, cpu_possible_map);
|
||||
}
|
||||
|
||||
for_each_possible_cpu (cpu) {
|
||||
struct task_struct *idle;
|
||||
|
||||
if (cpu == 0)
|
||||
continue;
|
||||
|
||||
idle = fork_idle(cpu);
|
||||
if (IS_ERR(idle))
|
||||
panic("failed fork for CPU %d", cpu);
|
||||
|
||||
cpu_set(cpu, cpu_present_map);
|
||||
}
|
||||
|
||||
//init_xenbus_allowed_cpumask();
|
||||
}
|
||||
|
||||
static __cpuinit int
|
||||
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
struct vcpu_guest_context *ctxt;
|
||||
struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
|
||||
|
||||
if (cpu_test_and_set(cpu, cpu_initialized_map))
|
||||
return 0;
|
||||
|
||||
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
|
||||
if (ctxt == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
ctxt->flags = VGCF_IN_KERNEL;
|
||||
ctxt->user_regs.ds = __USER_DS;
|
||||
ctxt->user_regs.es = __USER_DS;
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
ctxt->user_regs.gs = 0;
|
||||
ctxt->user_regs.ss = __KERNEL_DS;
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
|
||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||
|
||||
xen_copy_trap_info(ctxt->trap_ctxt);
|
||||
|
||||
ctxt->ldt_ents = 0;
|
||||
|
||||
BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
|
||||
make_lowmem_page_readonly(gdt->gdt);
|
||||
|
||||
ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
|
||||
ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
|
||||
|
||||
ctxt->user_regs.cs = __KERNEL_CS;
|
||||
ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
|
||||
|
||||
ctxt->kernel_ss = __KERNEL_DS;
|
||||
ctxt->kernel_sp = idle->thread.esp0;
|
||||
|
||||
ctxt->event_callback_cs = __KERNEL_CS;
|
||||
ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_cs = __KERNEL_CS;
|
||||
ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
|
||||
|
||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
||||
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
|
||||
BUG();
|
||||
|
||||
kfree(ctxt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __cpuinit xen_cpu_up(unsigned int cpu)
|
||||
{
|
||||
struct task_struct *idle = idle_task(cpu);
|
||||
int rc;
|
||||
|
||||
#if 0
|
||||
rc = cpu_up_check(cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
init_gdt(cpu);
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
irq_ctx_init(cpu);
|
||||
xen_setup_timer(cpu);
|
||||
|
||||
/* make sure interrupts start blocked */
|
||||
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
|
||||
|
||||
rc = cpu_initialize_context(cpu, idle);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
alternatives_smp_switch(1);
|
||||
|
||||
rc = xen_smp_intr_init(cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
smp_store_cpu_info(cpu);
|
||||
set_cpu_sibling_map(cpu);
|
||||
/* This must be done before setting cpu_online_map */
|
||||
wmb();
|
||||
|
||||
cpu_set(cpu, cpu_online_map);
|
||||
|
||||
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
|
||||
BUG_ON(rc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xen_smp_cpus_done(unsigned int max_cpus)
|
||||
{
|
||||
}
|
||||
|
||||
static void stop_self(void *v)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* make sure we're not pinning something down */
|
||||
load_cr3(swapper_pg_dir);
|
||||
/* should set up a minimal gdt */
|
||||
|
||||
HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
|
||||
BUG();
|
||||
}
|
||||
|
||||
void xen_smp_send_stop(void)
|
||||
{
|
||||
smp_call_function(stop_self, NULL, 0, 0);
|
||||
}
|
||||
|
||||
void xen_smp_send_reschedule(int cpu)
|
||||
{
|
||||
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
|
||||
}
|
||||
|
||||
|
||||
static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
|
||||
{
|
||||
unsigned cpu;
|
||||
|
||||
cpus_and(mask, mask, cpu_online_map);
|
||||
|
||||
for_each_cpu_mask(cpu, mask)
|
||||
xen_send_IPI_one(cpu, vector);
|
||||
}
|
||||
|
||||
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
void (*func) (void *info) = call_data->func;
|
||||
void *info = call_data->info;
|
||||
int wait = call_data->wait;
|
||||
|
||||
/*
|
||||
* Notify initiating CPU that I've grabbed the data and am
|
||||
* about to execute the function
|
||||
*/
|
||||
mb();
|
||||
atomic_inc(&call_data->started);
|
||||
/*
|
||||
* At this point the info structure may be out of scope unless wait==1
|
||||
*/
|
||||
irq_enter();
|
||||
(*func)(info);
|
||||
irq_exit();
|
||||
|
||||
if (wait) {
|
||||
mb(); /* commit everything before setting finished */
|
||||
atomic_inc(&call_data->finished);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
|
||||
void *info, int wait)
|
||||
{
|
||||
struct call_data_struct data;
|
||||
int cpus;
|
||||
|
||||
/* Holding any lock stops cpus from going down. */
|
||||
spin_lock(&call_lock);
|
||||
|
||||
cpu_clear(smp_processor_id(), mask);
|
||||
|
||||
cpus = cpus_weight(mask);
|
||||
if (!cpus) {
|
||||
spin_unlock(&call_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Can deadlock when called with interrupts disabled */
|
||||
WARN_ON(irqs_disabled());
|
||||
|
||||
data.func = func;
|
||||
data.info = info;
|
||||
atomic_set(&data.started, 0);
|
||||
data.wait = wait;
|
||||
if (wait)
|
||||
atomic_set(&data.finished, 0);
|
||||
|
||||
call_data = &data;
|
||||
mb(); /* write everything before IPI */
|
||||
|
||||
/* Send a message to other CPUs and wait for them to respond */
|
||||
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
|
||||
|
||||
/* Make sure other vcpus get a chance to run.
|
||||
XXX too severe? Maybe we should check the other CPU's states? */
|
||||
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
|
||||
|
||||
/* Wait for response */
|
||||
while (atomic_read(&data.started) != cpus ||
|
||||
(wait && atomic_read(&data.finished) != cpus))
|
||||
cpu_relax();
|
||||
|
||||
spin_unlock(&call_lock);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,590 @@
|
|||
/*
|
||||
* Xen time implementation.
|
||||
*
|
||||
* This is implemented in terms of a clocksource driver which uses
|
||||
* the hypervisor clock as a nanosecond timebase, and a clockevent
|
||||
* driver which uses the hypervisor's timer mechanism.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include <xen/events.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
||||
#include "xen-ops.h"
|
||||
|
||||
#define XEN_SHIFT 22
|
||||
|
||||
/* Xen may fire a timer up to this many ns early */
|
||||
#define TIMER_SLOP 100000
|
||||
#define NS_PER_TICK (1000000000LL / HZ)
|
||||
|
||||
static cycle_t xen_clocksource_read(void);
|
||||
|
||||
/* These are perodically updated in shared_info, and then copied here. */
|
||||
struct shadow_time_info {
|
||||
u64 tsc_timestamp; /* TSC at last update of time vals. */
|
||||
u64 system_timestamp; /* Time, in nanosecs, since boot. */
|
||||
u32 tsc_to_nsec_mul;
|
||||
int tsc_shift;
|
||||
u32 version;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
|
||||
|
||||
/* runstate info updated by Xen */
|
||||
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
|
||||
|
||||
/* snapshots of runstate info */
|
||||
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
|
||||
|
||||
/* unused ns of stolen and blocked time */
|
||||
static DEFINE_PER_CPU(u64, residual_stolen);
|
||||
static DEFINE_PER_CPU(u64, residual_blocked);
|
||||
|
||||
/* return an consistent snapshot of 64-bit time/counter value */
|
||||
static u64 get64(const u64 *p)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
if (BITS_PER_LONG < 64) {
|
||||
u32 *p32 = (u32 *)p;
|
||||
u32 h, l;
|
||||
|
||||
/*
|
||||
* Read high then low, and then make sure high is
|
||||
* still the same; this will only loop if low wraps
|
||||
* and carries into high.
|
||||
* XXX some clean way to make this endian-proof?
|
||||
*/
|
||||
do {
|
||||
h = p32[1];
|
||||
barrier();
|
||||
l = p32[0];
|
||||
barrier();
|
||||
} while (p32[1] != h);
|
||||
|
||||
ret = (((u64)h) << 32) | l;
|
||||
} else
|
||||
ret = *p;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Runstate accounting
|
||||
*/
|
||||
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
|
||||
{
|
||||
u64 state_time;
|
||||
struct vcpu_runstate_info *state;
|
||||
|
||||
BUG_ON(preemptible());
|
||||
|
||||
state = &__get_cpu_var(runstate);
|
||||
|
||||
/*
|
||||
* The runstate info is always updated by the hypervisor on
|
||||
* the current CPU, so there's no need to use anything
|
||||
* stronger than a compiler barrier when fetching it.
|
||||
*/
|
||||
do {
|
||||
state_time = get64(&state->state_entry_time);
|
||||
barrier();
|
||||
*res = *state;
|
||||
barrier();
|
||||
} while (get64(&state->state_entry_time) != state_time);
|
||||
}
|
||||
|
||||
static void setup_runstate_info(int cpu)
|
||||
{
|
||||
struct vcpu_register_runstate_memory_area area;
|
||||
|
||||
area.addr.v = &per_cpu(runstate, cpu);
|
||||
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
|
||||
cpu, &area))
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void do_stolen_accounting(void)
|
||||
{
|
||||
struct vcpu_runstate_info state;
|
||||
struct vcpu_runstate_info *snap;
|
||||
s64 blocked, runnable, offline, stolen;
|
||||
cputime_t ticks;
|
||||
|
||||
get_runstate_snapshot(&state);
|
||||
|
||||
WARN_ON(state.state != RUNSTATE_running);
|
||||
|
||||
snap = &__get_cpu_var(runstate_snapshot);
|
||||
|
||||
/* work out how much time the VCPU has not been runn*ing* */
|
||||
blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
|
||||
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
|
||||
offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
|
||||
|
||||
*snap = state;
|
||||
|
||||
/* Add the appropriate number of ticks of stolen time,
|
||||
including any left-overs from last time. Passing NULL to
|
||||
account_steal_time accounts the time as stolen. */
|
||||
stolen = runnable + offline + __get_cpu_var(residual_stolen);
|
||||
|
||||
if (stolen < 0)
|
||||
stolen = 0;
|
||||
|
||||
ticks = 0;
|
||||
while (stolen >= NS_PER_TICK) {
|
||||
ticks++;
|
||||
stolen -= NS_PER_TICK;
|
||||
}
|
||||
__get_cpu_var(residual_stolen) = stolen;
|
||||
account_steal_time(NULL, ticks);
|
||||
|
||||
/* Add the appropriate number of ticks of blocked time,
|
||||
including any left-overs from last time. Passing idle to
|
||||
account_steal_time accounts the time as idle/wait. */
|
||||
blocked += __get_cpu_var(residual_blocked);
|
||||
|
||||
if (blocked < 0)
|
||||
blocked = 0;
|
||||
|
||||
ticks = 0;
|
||||
while (blocked >= NS_PER_TICK) {
|
||||
ticks++;
|
||||
blocked -= NS_PER_TICK;
|
||||
}
|
||||
__get_cpu_var(residual_blocked) = blocked;
|
||||
account_steal_time(idle_task(smp_processor_id()), ticks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Xen sched_clock implementation. Returns the number of unstolen
|
||||
* nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
|
||||
* states.
|
||||
*/
|
||||
unsigned long long xen_sched_clock(void)
|
||||
{
|
||||
struct vcpu_runstate_info state;
|
||||
cycle_t now;
|
||||
u64 ret;
|
||||
s64 offset;
|
||||
|
||||
/*
|
||||
* Ideally sched_clock should be called on a per-cpu basis
|
||||
* anyway, so preempt should already be disabled, but that's
|
||||
* not current practice at the moment.
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
now = xen_clocksource_read();
|
||||
|
||||
get_runstate_snapshot(&state);
|
||||
|
||||
WARN_ON(state.state != RUNSTATE_running);
|
||||
|
||||
offset = now - state.state_entry_time;
|
||||
if (offset < 0)
|
||||
offset = 0;
|
||||
|
||||
ret = state.time[RUNSTATE_blocked] +
|
||||
state.time[RUNSTATE_running] +
|
||||
offset;
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* Get the CPU speed from Xen */
|
||||
unsigned long xen_cpu_khz(void)
|
||||
{
|
||||
u64 cpu_khz = 1000000ULL << 32;
|
||||
const struct vcpu_time_info *info =
|
||||
&HYPERVISOR_shared_info->vcpu_info[0].time;
|
||||
|
||||
do_div(cpu_khz, info->tsc_to_system_mul);
|
||||
if (info->tsc_shift < 0)
|
||||
cpu_khz <<= -info->tsc_shift;
|
||||
else
|
||||
cpu_khz >>= info->tsc_shift;
|
||||
|
||||
return cpu_khz;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads a consistent set of time-base values from Xen, into a shadow data
|
||||
* area.
|
||||
*/
|
||||
static unsigned get_time_values_from_xen(void)
|
||||
{
|
||||
struct vcpu_time_info *src;
|
||||
struct shadow_time_info *dst;
|
||||
|
||||
/* src is shared memory with the hypervisor, so we need to
|
||||
make sure we get a consistent snapshot, even in the face of
|
||||
being preempted. */
|
||||
src = &__get_cpu_var(xen_vcpu)->time;
|
||||
dst = &__get_cpu_var(shadow_time);
|
||||
|
||||
do {
|
||||
dst->version = src->version;
|
||||
rmb(); /* fetch version before data */
|
||||
dst->tsc_timestamp = src->tsc_timestamp;
|
||||
dst->system_timestamp = src->system_time;
|
||||
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
|
||||
dst->tsc_shift = src->tsc_shift;
|
||||
rmb(); /* test version after fetching data */
|
||||
} while ((src->version & 1) | (dst->version ^ src->version));
|
||||
|
||||
return dst->version;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
|
||||
* yielding a 64-bit result.
|
||||
*/
|
||||
static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
|
||||
{
|
||||
u64 product;
|
||||
#ifdef __i386__
|
||||
u32 tmp1, tmp2;
|
||||
#endif
|
||||
|
||||
if (shift < 0)
|
||||
delta >>= -shift;
|
||||
else
|
||||
delta <<= shift;
|
||||
|
||||
#ifdef __i386__
|
||||
__asm__ (
|
||||
"mul %5 ; "
|
||||
"mov %4,%%eax ; "
|
||||
"mov %%edx,%4 ; "
|
||||
"mul %5 ; "
|
||||
"xor %5,%5 ; "
|
||||
"add %4,%%eax ; "
|
||||
"adc %5,%%edx ; "
|
||||
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
|
||||
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
|
||||
#elif __x86_64__
|
||||
__asm__ (
|
||||
"mul %%rdx ; shrd $32,%%rdx,%%rax"
|
||||
: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
|
||||
#else
|
||||
#error implement me!
|
||||
#endif
|
||||
|
||||
return product;
|
||||
}
|
||||
|
||||
static u64 get_nsec_offset(struct shadow_time_info *shadow)
|
||||
{
|
||||
u64 now, delta;
|
||||
now = native_read_tsc();
|
||||
delta = now - shadow->tsc_timestamp;
|
||||
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
|
||||
}
|
||||
|
||||
static cycle_t xen_clocksource_read(void)
|
||||
{
|
||||
struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
|
||||
cycle_t ret;
|
||||
unsigned version;
|
||||
|
||||
do {
|
||||
version = get_time_values_from_xen();
|
||||
barrier();
|
||||
ret = shadow->system_timestamp + get_nsec_offset(shadow);
|
||||
barrier();
|
||||
} while (version != __get_cpu_var(xen_vcpu)->time.version);
|
||||
|
||||
put_cpu_var(shadow_time);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void xen_read_wallclock(struct timespec *ts)
|
||||
{
|
||||
const struct shared_info *s = HYPERVISOR_shared_info;
|
||||
u32 version;
|
||||
u64 delta;
|
||||
struct timespec now;
|
||||
|
||||
/* get wallclock at system boot */
|
||||
do {
|
||||
version = s->wc_version;
|
||||
rmb(); /* fetch version before time */
|
||||
now.tv_sec = s->wc_sec;
|
||||
now.tv_nsec = s->wc_nsec;
|
||||
rmb(); /* fetch time before checking version */
|
||||
} while ((s->wc_version & 1) | (version ^ s->wc_version));
|
||||
|
||||
delta = xen_clocksource_read(); /* time since system boot */
|
||||
delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
|
||||
|
||||
now.tv_nsec = do_div(delta, NSEC_PER_SEC);
|
||||
now.tv_sec = delta;
|
||||
|
||||
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
|
||||
}
|
||||
|
||||
unsigned long xen_get_wallclock(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
xen_read_wallclock(&ts);
|
||||
|
||||
return ts.tv_sec;
|
||||
}
|
||||
|
||||
int xen_set_wallclock(unsigned long now)
|
||||
{
|
||||
/* do nothing for domU */
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct clocksource xen_clocksource __read_mostly = {
|
||||
.name = "xen",
|
||||
.rating = 400,
|
||||
.read = xen_clocksource_read,
|
||||
.mask = ~0,
|
||||
.mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
|
||||
.shift = XEN_SHIFT,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
/*
|
||||
Xen clockevent implementation
|
||||
|
||||
Xen has two clockevent implementations:
|
||||
|
||||
The old timer_op one works with all released versions of Xen prior
|
||||
to version 3.0.4. This version of the hypervisor provides a
|
||||
single-shot timer with nanosecond resolution. However, sharing the
|
||||
same event channel is a 100Hz tick which is delivered while the
|
||||
vcpu is running. We don't care about or use this tick, but it will
|
||||
cause the core time code to think the timer fired too soon, and
|
||||
will end up resetting it each time. It could be filtered, but
|
||||
doing so has complications when the ktime clocksource is not yet
|
||||
the xen clocksource (ie, at boot time).
|
||||
|
||||
The new vcpu_op-based timer interface allows the tick timer period
|
||||
to be changed or turned off. The tick timer is not useful as a
|
||||
periodic timer because events are only delivered to running vcpus.
|
||||
The one-shot timer can report when a timeout is in the past, so
|
||||
set_next_event is capable of returning -ETIME when appropriate.
|
||||
This interface is used when available.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
Get a hypervisor absolute time. In theory we could maintain an
|
||||
offset between the kernel's time and the hypervisor's time, and
|
||||
apply that to a kernel's absolute timeout. Unfortunately the
|
||||
hypervisor and kernel times can drift even if the kernel is using
|
||||
the Xen clocksource, because ntp can warp the kernel's clocksource.
|
||||
*/
|
||||
static s64 get_abs_timeout(unsigned long delta)
|
||||
{
|
||||
return xen_clocksource_read() + delta;
|
||||
}
|
||||
|
||||
static void xen_timerop_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
/* unsupported */
|
||||
WARN_ON(1);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
HYPERVISOR_set_timer_op(0); /* cancel timeout */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_timerop_set_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
|
||||
|
||||
if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
|
||||
BUG();
|
||||
|
||||
/* We may have missed the deadline, but there's no real way of
|
||||
knowing for sure. If the event was in the past, then we'll
|
||||
get an immediate interrupt. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct clock_event_device xen_timerop_clockevent = {
|
||||
.name = "xen",
|
||||
.features = CLOCK_EVT_FEAT_ONESHOT,
|
||||
|
||||
.max_delta_ns = 0xffffffff,
|
||||
.min_delta_ns = TIMER_SLOP,
|
||||
|
||||
.mult = 1,
|
||||
.shift = 0,
|
||||
.rating = 500,
|
||||
|
||||
.set_mode = xen_timerop_set_mode,
|
||||
.set_next_event = xen_timerop_set_next_event,
|
||||
};
|
||||
|
||||
|
||||
|
||||
static void xen_vcpuop_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
WARN_ON(1); /* unsupported */
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
|
||||
BUG();
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
|
||||
HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_vcpuop_set_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct vcpu_set_singleshot_timer single;
|
||||
int ret;
|
||||
|
||||
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
|
||||
|
||||
single.timeout_abs_ns = get_abs_timeout(delta);
|
||||
single.flags = VCPU_SSHOTTMR_future;
|
||||
|
||||
ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
|
||||
|
||||
BUG_ON(ret != 0 && ret != -ETIME);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct clock_event_device xen_vcpuop_clockevent = {
|
||||
.name = "xen",
|
||||
.features = CLOCK_EVT_FEAT_ONESHOT,
|
||||
|
||||
.max_delta_ns = 0xffffffff,
|
||||
.min_delta_ns = TIMER_SLOP,
|
||||
|
||||
.mult = 1,
|
||||
.shift = 0,
|
||||
.rating = 500,
|
||||
|
||||
.set_mode = xen_vcpuop_set_mode,
|
||||
.set_next_event = xen_vcpuop_set_next_event,
|
||||
};
|
||||
|
||||
static const struct clock_event_device *xen_clockevent =
|
||||
&xen_timerop_clockevent;
|
||||
static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
|
||||
|
||||
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
|
||||
irqreturn_t ret;
|
||||
|
||||
ret = IRQ_NONE;
|
||||
if (evt->event_handler) {
|
||||
evt->event_handler(evt);
|
||||
ret = IRQ_HANDLED;
|
||||
}
|
||||
|
||||
do_stolen_accounting();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void xen_setup_timer(int cpu)
|
||||
{
|
||||
const char *name;
|
||||
struct clock_event_device *evt;
|
||||
int irq;
|
||||
|
||||
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
|
||||
|
||||
name = kasprintf(GFP_KERNEL, "timer%d", cpu);
|
||||
if (!name)
|
||||
name = "<timer kasprintf failed>";
|
||||
|
||||
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
|
||||
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
|
||||
name, NULL);
|
||||
|
||||
evt = &per_cpu(xen_clock_events, cpu);
|
||||
memcpy(evt, xen_clockevent, sizeof(*evt));
|
||||
|
||||
evt->cpumask = cpumask_of_cpu(cpu);
|
||||
evt->irq = irq;
|
||||
|
||||
setup_runstate_info(cpu);
|
||||
}
|
||||
|
||||
void xen_setup_cpu_clockevents(void)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
|
||||
clockevents_register_device(&__get_cpu_var(xen_clock_events));
|
||||
}
|
||||
|
||||
__init void xen_time_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
get_time_values_from_xen();
|
||||
|
||||
clocksource_register(&xen_clocksource);
|
||||
|
||||
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
|
||||
/* Successfully turned off 100Hz tick, so we have the
|
||||
vcpuop-based timer interface */
|
||||
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
|
||||
xen_clockevent = &xen_vcpuop_clockevent;
|
||||
}
|
||||
|
||||
/* Set initial system time with full resolution */
|
||||
xen_read_wallclock(&xtime);
|
||||
set_normalized_timespec(&wall_to_monotonic,
|
||||
-xtime.tv_sec, -xtime.tv_nsec);
|
||||
|
||||
tsc_disable = 0;
|
||||
|
||||
xen_setup_timer(cpu);
|
||||
xen_setup_cpu_clockevents();
|
||||
}
|
|
@ -0,0 +1,291 @@
|
|||
/*
|
||||
Asm versions of Xen pv-ops, suitable for either direct use or inlining.
|
||||
The inline versions are the same as the direct-use versions, with the
|
||||
pre- and post-amble chopped off.
|
||||
|
||||
This code is encoded for size rather than absolute efficiency,
|
||||
with a view to being able to inline as much as possible.
|
||||
|
||||
We only bother with direct forms (ie, vcpu in pda) of the operations
|
||||
here; the indirect forms are better handled in C, since they're
|
||||
generally too large to inline anyway.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
|
||||
#define ENDPATCH(x) .globl x##_end; x##_end=.
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
/*
|
||||
Enable events. This clears the event mask and tests the pending
|
||||
event status with one and operation. If there are pending
|
||||
events, then enter the hypervisor to get them handled.
|
||||
*/
|
||||
ENTRY(xen_irq_enable_direct)
|
||||
/* Clear mask and test pending */
|
||||
andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_irq_enable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_enable_direct)
|
||||
RELOC(xen_irq_enable_direct, 2b+1)
|
||||
|
||||
|
||||
/*
|
||||
Disabling events is simply a matter of making the event mask
|
||||
non-zero.
|
||||
*/
|
||||
ENTRY(xen_irq_disable_direct)
|
||||
movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
ENDPATCH(xen_irq_disable_direct)
|
||||
ret
|
||||
ENDPROC(xen_irq_disable_direct)
|
||||
RELOC(xen_irq_disable_direct, 0)
|
||||
|
||||
/*
|
||||
(xen_)save_fl is used to get the current interrupt enable status.
|
||||
Callers expect the status to be in X86_EFLAGS_IF, and other bits
|
||||
may be set in the return value. We take advantage of this by
|
||||
making sure that X86_EFLAGS_IF has the right value (and other bits
|
||||
in that byte are 0), but other bits in the return value are
|
||||
undefined. We need to toggle the state of the bit, because
|
||||
Xen and x86 use opposite senses (mask vs enable).
|
||||
*/
|
||||
ENTRY(xen_save_fl_direct)
|
||||
testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
setz %ah
|
||||
addb %ah,%ah
|
||||
ENDPATCH(xen_save_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_save_fl_direct)
|
||||
RELOC(xen_save_fl_direct, 0)
|
||||
|
||||
|
||||
/*
|
||||
In principle the caller should be passing us a value return
|
||||
from xen_save_fl_direct, but for robustness sake we test only
|
||||
the X86_EFLAGS_IF flag rather than the whole byte. After
|
||||
setting the interrupt mask state, it checks for unmasked
|
||||
pending events and enters the hypervisor to get them delivered
|
||||
if so.
|
||||
*/
|
||||
ENTRY(xen_restore_fl_direct)
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
|
||||
/* Preempt here doesn't matter because that will deal with
|
||||
any pending interrupts. The pending check may end up being
|
||||
run on the wrong CPU, but that doesn't hurt. */
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
|
||||
jz 1f
|
||||
2: call check_events
|
||||
1:
|
||||
ENDPATCH(xen_restore_fl_direct)
|
||||
ret
|
||||
ENDPROC(xen_restore_fl_direct)
|
||||
RELOC(xen_restore_fl_direct, 2b+1)
|
||||
|
||||
/*
|
||||
This is run where a normal iret would be run, with the same stack setup:
|
||||
8: eflags
|
||||
4: cs
|
||||
esp-> 0: eip
|
||||
|
||||
This attempts to make sure that any pending events are dealt
|
||||
with on return to usermode, but there is a small window in
|
||||
which an event can happen just before entering usermode. If
|
||||
the nested interrupt ends up setting one of the TIF_WORK_MASK
|
||||
pending work flags, they will not be tested again before
|
||||
returning to usermode. This means that a process can end up
|
||||
with pending work, which will be unprocessed until the process
|
||||
enters and leaves the kernel again, which could be an
|
||||
unbounded amount of time. This means that a pending signal or
|
||||
reschedule event could be indefinitely delayed.
|
||||
|
||||
The fix is to notice a nested interrupt in the critical
|
||||
window, and if one occurs, then fold the nested interrupt into
|
||||
the current interrupt stack frame, and re-process it
|
||||
iteratively rather than recursively. This means that it will
|
||||
exit via the normal path, and all pending work will be dealt
|
||||
with appropriately.
|
||||
|
||||
Because the nested interrupt handler needs to deal with the
|
||||
current stack state in whatever form its in, we keep things
|
||||
simple by only using a single register which is pushed/popped
|
||||
on the stack.
|
||||
|
||||
Non-direct iret could be done in the same way, but it would
|
||||
require an annoying amount of code duplication. We'll assume
|
||||
that direct mode will be the common case once the hypervisor
|
||||
support becomes commonplace.
|
||||
*/
|
||||
ENTRY(xen_iret_direct)
|
||||
/* test eflags for special cases */
|
||||
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
|
||||
jnz hyper_iret
|
||||
|
||||
push %eax
|
||||
ESP_OFFSET=4 # bytes pushed onto stack
|
||||
|
||||
/* Store vcpu_info pointer for easy access. Do it this
|
||||
way to avoid having to reload %fs */
|
||||
#ifdef CONFIG_SMP
|
||||
GET_THREAD_INFO(%eax)
|
||||
movl TI_cpu(%eax),%eax
|
||||
movl __per_cpu_offset(,%eax,4),%eax
|
||||
lea per_cpu__xen_vcpu_info(%eax),%eax
|
||||
#else
|
||||
movl $per_cpu__xen_vcpu_info, %eax
|
||||
#endif
|
||||
|
||||
/* check IF state we're restoring */
|
||||
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
|
||||
|
||||
/* Maybe enable events. Once this happens we could get a
|
||||
recursive event, so the critical region starts immediately
|
||||
afterwards. However, if that happens we don't end up
|
||||
resuming the code, so we don't have to be worried about
|
||||
being preempted to another CPU. */
|
||||
setz XEN_vcpu_info_mask(%eax)
|
||||
xen_iret_start_crit:
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, XEN_vcpu_info_pending(%eax)
|
||||
|
||||
/* If there's something pending, mask events again so we
|
||||
can jump back into xen_hypervisor_callback */
|
||||
sete XEN_vcpu_info_mask(%eax)
|
||||
|
||||
popl %eax
|
||||
|
||||
/* From this point on the registers are restored and the stack
|
||||
updated, so we don't need to worry about it if we're preempted */
|
||||
iret_restore_end:
|
||||
|
||||
/* Jump to hypervisor_callback after fixing up the stack.
|
||||
Events are masked, so jumping out of the critical
|
||||
region is OK. */
|
||||
je xen_hypervisor_callback
|
||||
|
||||
iret
|
||||
xen_iret_end_crit:
|
||||
|
||||
hyper_iret:
|
||||
/* put this out of line since its very rarely used */
|
||||
jmp hypercall_page + __HYPERVISOR_iret * 32
|
||||
|
||||
.globl xen_iret_start_crit, xen_iret_end_crit
|
||||
|
||||
/*
|
||||
This is called by xen_hypervisor_callback in entry.S when it sees
|
||||
that the EIP at the time of interrupt was between xen_iret_start_crit
|
||||
and xen_iret_end_crit. We're passed the EIP in %eax so we can do
|
||||
a more refined determination of what to do.
|
||||
|
||||
The stack format at this point is:
|
||||
----------------
|
||||
ss : (ss/esp may be present if we came from usermode)
|
||||
esp :
|
||||
eflags } outer exception info
|
||||
cs }
|
||||
eip }
|
||||
---------------- <- edi (copy dest)
|
||||
eax : outer eax if it hasn't been restored
|
||||
----------------
|
||||
eflags } nested exception info
|
||||
cs } (no ss/esp because we're nested
|
||||
eip } from the same ring)
|
||||
orig_eax }<- esi (copy src)
|
||||
- - - - - - - -
|
||||
fs }
|
||||
es }
|
||||
ds } SAVE_ALL state
|
||||
eax }
|
||||
: :
|
||||
ebx }
|
||||
----------------
|
||||
return addr <- esp
|
||||
----------------
|
||||
|
||||
In order to deliver the nested exception properly, we need to shift
|
||||
everything from the return addr up to the error code so it
|
||||
sits just under the outer exception info. This means that when we
|
||||
handle the exception, we do it in the context of the outer exception
|
||||
rather than starting a new one.
|
||||
|
||||
The only caveat is that if the outer eax hasn't been
|
||||
restored yet (ie, it's still on stack), we need to insert
|
||||
its value into the SAVE_ALL state before going on, since
|
||||
it's usermode state which we eventually need to restore.
|
||||
*/
|
||||
ENTRY(xen_iret_crit_fixup)
|
||||
/* offsets +4 for return address */
|
||||
|
||||
/*
|
||||
Paranoia: Make sure we're really coming from userspace.
|
||||
One could imagine a case where userspace jumps into the
|
||||
critical range address, but just before the CPU delivers a GP,
|
||||
it decides to deliver an interrupt instead. Unlikely?
|
||||
Definitely. Easy to avoid? Yes. The Intel documents
|
||||
explicitly say that the reported EIP for a bad jump is the
|
||||
jump instruction itself, not the destination, but some virtual
|
||||
environments get this wrong.
|
||||
*/
|
||||
movl PT_CS+4(%esp), %ecx
|
||||
andl $SEGMENT_RPL_MASK, %ecx
|
||||
cmpl $USER_RPL, %ecx
|
||||
je 2f
|
||||
|
||||
lea PT_ORIG_EAX+4(%esp), %esi
|
||||
lea PT_EFLAGS+4(%esp), %edi
|
||||
|
||||
/* If eip is before iret_restore_end then stack
|
||||
hasn't been restored yet. */
|
||||
cmp $iret_restore_end, %eax
|
||||
jae 1f
|
||||
|
||||
movl 0+4(%edi),%eax /* copy EAX */
|
||||
movl %eax, PT_EAX+4(%esp)
|
||||
|
||||
lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
|
||||
|
||||
/* set up the copy */
|
||||
1: std
|
||||
mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */
|
||||
rep movsl
|
||||
cld
|
||||
|
||||
lea 4(%edi),%esp /* point esp to new frame */
|
||||
2: ret
|
||||
|
||||
|
||||
/*
|
||||
Force an event check by making a hypercall,
|
||||
but preserve regs before making the call.
|
||||
*/
|
||||
check_events:
|
||||
push %eax
|
||||
push %ecx
|
||||
push %edx
|
||||
call force_evtchn_callback
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %eax
|
||||
ret
|
|
@ -0,0 +1,36 @@
|
|||
/* Xen-specific pieces of head.S, intended to be included in the right
|
||||
place in head.S */
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
|
||||
#include <linux/elfnote.h>
|
||||
#include <asm/boot.h>
|
||||
#include <xen/interface/elfnote.h>
|
||||
|
||||
ENTRY(startup_xen)
|
||||
movl %esi,xen_start_info
|
||||
cld
|
||||
movl $(init_thread_union+THREAD_SIZE),%esp
|
||||
jmp xen_start_kernel
|
||||
|
||||
.pushsection ".bss.page_aligned"
|
||||
.align PAGE_SIZE_asm
|
||||
ENTRY(hypercall_page)
|
||||
.skip 0x1000
|
||||
.popsection
|
||||
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
|
||||
#ifdef CONFIG_X86_PAE
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
|
||||
#else
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
|
||||
#endif
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
|
||||
|
||||
#endif /*CONFIG_XEN */
|
|
@ -0,0 +1,71 @@
|
|||
#ifndef XEN_OPS_H
|
||||
#define XEN_OPS_H
|
||||
|
||||
#include <linux/init.h>
|
||||
|
||||
/* These are code, but not functions. Defined in entry.S */
|
||||
extern const char xen_hypervisor_callback[];
|
||||
extern const char xen_failsafe_callback[];
|
||||
|
||||
void xen_copy_trap_info(struct trap_info *traps);
|
||||
|
||||
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
||||
DECLARE_PER_CPU(unsigned long, xen_cr3);
|
||||
|
||||
extern struct start_info *xen_start_info;
|
||||
extern struct shared_info *HYPERVISOR_shared_info;
|
||||
|
||||
char * __init xen_memory_setup(void);
|
||||
void __init xen_arch_setup(void);
|
||||
void __init xen_init_IRQ(void);
|
||||
|
||||
void xen_setup_timer(int cpu);
|
||||
void xen_setup_cpu_clockevents(void);
|
||||
unsigned long xen_cpu_khz(void);
|
||||
void __init xen_time_init(void);
|
||||
unsigned long xen_get_wallclock(void);
|
||||
int xen_set_wallclock(unsigned long time);
|
||||
unsigned long long xen_sched_clock(void);
|
||||
|
||||
void xen_mark_init_mm_pinned(void);
|
||||
|
||||
DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
|
||||
|
||||
static inline unsigned xen_get_lazy_mode(void)
|
||||
{
|
||||
return x86_read_percpu(xen_lazy_mode);
|
||||
}
|
||||
|
||||
void __init xen_fill_possible_map(void);
|
||||
|
||||
void __init xen_setup_vcpu_info_placement(void);
|
||||
void xen_smp_prepare_boot_cpu(void);
|
||||
void xen_smp_prepare_cpus(unsigned int max_cpus);
|
||||
int xen_cpu_up(unsigned int cpu);
|
||||
void xen_smp_cpus_done(unsigned int max_cpus);
|
||||
|
||||
void xen_smp_send_stop(void);
|
||||
void xen_smp_send_reschedule(int cpu);
|
||||
int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
|
||||
int wait);
|
||||
int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
||||
int nonatomic, int wait);
|
||||
|
||||
int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
|
||||
void *info, int wait);
|
||||
|
||||
|
||||
/* Declare an asm function, along with symbols needed to make it
|
||||
inlineable */
|
||||
#define DECL_ASM(ret, name, ...) \
|
||||
ret name(__VA_ARGS__); \
|
||||
extern char name##_end[]; \
|
||||
extern char name##_reloc[] \
|
||||
|
||||
DECL_ASM(void, xen_irq_enable_direct, void);
|
||||
DECL_ASM(void, xen_irq_disable_direct, void);
|
||||
DECL_ASM(unsigned long, xen_save_fl_direct, void);
|
||||
DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
||||
|
||||
void xen_iret_direct(void);
|
||||
#endif /* XEN_OPS_H */
|
|
@ -6,6 +6,7 @@
|
|||
#include <asm/io.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fcntl.h>
|
||||
#include <xen/hvc-console.h>
|
||||
|
||||
/* Simple VGA output */
|
||||
|
||||
|
@ -242,6 +243,10 @@ static int __init setup_early_printk(char *buf)
|
|||
simnow_init(buf + 6);
|
||||
early_console = &simnow_console;
|
||||
keep_early = 1;
|
||||
#ifdef CONFIG_HVC_XEN
|
||||
} else if (!strncmp(buf, "xen", 3)) {
|
||||
early_console = &xenboot_console;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (keep_early)
|
||||
|
|
|
@ -174,7 +174,7 @@ static void do_mce_trigger(void)
|
|||
if (events != atomic_read(&mce_logged) && trigger[0]) {
|
||||
/* Small race window, but should be harmless. */
|
||||
atomic_set(&mce_logged, events);
|
||||
call_usermodehelper(trigger, trigger_argv, NULL, -1);
|
||||
call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/
|
|||
obj-$(CONFIG_PNP) += pnp/
|
||||
obj-$(CONFIG_ARM_AMBA) += amba/
|
||||
|
||||
obj-$(CONFIG_XEN) += xen/
|
||||
|
||||
# char/ comes before serial/ etc so that the VT console is the boot-time
|
||||
# default.
|
||||
obj-y += char/
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <linux/jiffies.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include <acpi/acpi_bus.h>
|
||||
|
@ -59,7 +60,6 @@
|
|||
#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
|
||||
#define ACPI_THERMAL_NOTIFY_HOT 0xF1
|
||||
#define ACPI_THERMAL_MODE_ACTIVE 0x00
|
||||
#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
|
||||
|
||||
#define ACPI_THERMAL_MAX_ACTIVE 10
|
||||
#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
|
||||
|
@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int acpi_thermal_call_usermode(char *path)
|
||||
{
|
||||
char *argv[2] = { NULL, NULL };
|
||||
char *envp[3] = { NULL, NULL, NULL };
|
||||
|
||||
|
||||
if (!path)
|
||||
return -EINVAL;
|
||||
|
||||
argv[0] = path;
|
||||
|
||||
/* minimal command environment */
|
||||
envp[0] = "HOME=/";
|
||||
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
|
||||
|
||||
call_usermodehelper(argv[0], argv, envp, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int acpi_thermal_critical(struct acpi_thermal *tz)
|
||||
{
|
||||
if (!tz || !tz->trips.critical.flags.valid)
|
||||
|
@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
|
|||
acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
|
||||
tz->trips.critical.flags.enabled);
|
||||
|
||||
acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
|
||||
orderly_poweroff(true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -427,4 +427,13 @@ config XILINX_SYSACE
|
|||
help
|
||||
Include support for the Xilinx SystemACE CompactFlash interface
|
||||
|
||||
config XEN_BLKDEV_FRONTEND
|
||||
tristate "Xen virtual block device support"
|
||||
depends on XEN
|
||||
default y
|
||||
help
|
||||
This driver implements the front-end of the Xen virtual
|
||||
block device driver. It communicates with a back-end driver
|
||||
in another domain which drives the actual block device.
|
||||
|
||||
endif # BLK_DEV
|
||||
|
|
|
@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o
|
|||
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
|
||||
obj-$(CONFIG_BLK_DEV_UB) += ub.o
|
||||
|
||||
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
||||
|
|
|
@ -0,0 +1,988 @@
|
|||
/*
|
||||
* blkfront.c
|
||||
*
|
||||
* XenLinux virtual block device driver.
|
||||
*
|
||||
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
||||
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
|
||||
* Copyright (c) 2004, Christian Limpach
|
||||
* Copyright (c) 2004, Andrew Warfield
|
||||
* Copyright (c) 2005, Christopher Clark
|
||||
* Copyright (c) 2005, XenSource Ltd
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <xen/xenbus.h>
|
||||
#include <xen/grant_table.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/page.h>
|
||||
|
||||
#include <xen/interface/grant_table.h>
|
||||
#include <xen/interface/io/blkif.h>
|
||||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
enum blkif_state {
|
||||
BLKIF_STATE_DISCONNECTED,
|
||||
BLKIF_STATE_CONNECTED,
|
||||
BLKIF_STATE_SUSPENDED,
|
||||
};
|
||||
|
||||
struct blk_shadow {
|
||||
struct blkif_request req;
|
||||
unsigned long request;
|
||||
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
};
|
||||
|
||||
static struct block_device_operations xlvbd_block_fops;
|
||||
|
||||
#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
|
||||
|
||||
/*
|
||||
* We have one of these per vbd, whether ide, scsi or 'other'. They
|
||||
* hang in private_data off the gendisk structure. We may end up
|
||||
* putting all kinds of interesting stuff here :-)
|
||||
*/
|
||||
struct blkfront_info
|
||||
{
|
||||
struct xenbus_device *xbdev;
|
||||
dev_t dev;
|
||||
struct gendisk *gd;
|
||||
int vdevice;
|
||||
blkif_vdev_t handle;
|
||||
enum blkif_state connected;
|
||||
int ring_ref;
|
||||
struct blkif_front_ring ring;
|
||||
unsigned int evtchn, irq;
|
||||
struct request_queue *rq;
|
||||
struct work_struct work;
|
||||
struct gnttab_free_callback callback;
|
||||
struct blk_shadow shadow[BLK_RING_SIZE];
|
||||
unsigned long shadow_free;
|
||||
int feature_barrier;
|
||||
|
||||
/**
|
||||
* The number of people holding this device open. We won't allow a
|
||||
* hot-unplug unless this is 0.
|
||||
*/
|
||||
int users;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(blkif_io_lock);
|
||||
|
||||
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
|
||||
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
|
||||
#define GRANT_INVALID_REF 0
|
||||
|
||||
#define PARTS_PER_DISK 16
|
||||
|
||||
#define BLKIF_MAJOR(dev) ((dev)>>8)
|
||||
#define BLKIF_MINOR(dev) ((dev) & 0xff)
|
||||
|
||||
#define DEV_NAME "xvd" /* name in /dev */
|
||||
|
||||
/* Information about our VBDs. */
|
||||
#define MAX_VBDS 64
|
||||
static LIST_HEAD(vbds_list);
|
||||
|
||||
static int get_id_from_freelist(struct blkfront_info *info)
|
||||
{
|
||||
unsigned long free = info->shadow_free;
|
||||
BUG_ON(free > BLK_RING_SIZE);
|
||||
info->shadow_free = info->shadow[free].req.id;
|
||||
info->shadow[free].req.id = 0x0fffffee; /* debug */
|
||||
return free;
|
||||
}
|
||||
|
||||
static void add_id_to_freelist(struct blkfront_info *info,
|
||||
unsigned long id)
|
||||
{
|
||||
info->shadow[id].req.id = info->shadow_free;
|
||||
info->shadow[id].request = 0;
|
||||
info->shadow_free = id;
|
||||
}
|
||||
|
||||
static void blkif_restart_queue_callback(void *arg)
|
||||
{
|
||||
struct blkfront_info *info = (struct blkfront_info *)arg;
|
||||
schedule_work(&info->work);
|
||||
}
|
||||
|
||||
/*
|
||||
* blkif_queue_request
|
||||
*
|
||||
* request block io
|
||||
*
|
||||
* id: for guest use only.
|
||||
* operation: BLKIF_OP_{READ,WRITE,PROBE}
|
||||
* buffer: buffer to read/write into. this should be a
|
||||
* virtual address in the guest os.
|
||||
*/
|
||||
static int blkif_queue_request(struct request *req)
|
||||
{
|
||||
struct blkfront_info *info = req->rq_disk->private_data;
|
||||
unsigned long buffer_mfn;
|
||||
struct blkif_request *ring_req;
|
||||
struct bio *bio;
|
||||
struct bio_vec *bvec;
|
||||
int idx;
|
||||
unsigned long id;
|
||||
unsigned int fsect, lsect;
|
||||
int ref;
|
||||
grant_ref_t gref_head;
|
||||
|
||||
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
||||
return 1;
|
||||
|
||||
if (gnttab_alloc_grant_references(
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
|
||||
gnttab_request_free_callback(
|
||||
&info->callback,
|
||||
blkif_restart_queue_callback,
|
||||
info,
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Fill out a communications ring structure. */
|
||||
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
||||
id = get_id_from_freelist(info);
|
||||
info->shadow[id].request = (unsigned long)req;
|
||||
|
||||
ring_req->id = id;
|
||||
ring_req->sector_number = (blkif_sector_t)req->sector;
|
||||
ring_req->handle = info->handle;
|
||||
|
||||
ring_req->operation = rq_data_dir(req) ?
|
||||
BLKIF_OP_WRITE : BLKIF_OP_READ;
|
||||
if (blk_barrier_rq(req))
|
||||
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
|
||||
|
||||
ring_req->nr_segments = 0;
|
||||
rq_for_each_bio (bio, req) {
|
||||
bio_for_each_segment (bvec, bio, idx) {
|
||||
BUG_ON(ring_req->nr_segments
|
||||
== BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
|
||||
fsect = bvec->bv_offset >> 9;
|
||||
lsect = fsect + (bvec->bv_len >> 9) - 1;
|
||||
/* install a grant reference. */
|
||||
ref = gnttab_claim_grant_reference(&gref_head);
|
||||
BUG_ON(ref == -ENOSPC);
|
||||
|
||||
gnttab_grant_foreign_access_ref(
|
||||
ref,
|
||||
info->xbdev->otherend_id,
|
||||
buffer_mfn,
|
||||
rq_data_dir(req) );
|
||||
|
||||
info->shadow[id].frame[ring_req->nr_segments] =
|
||||
mfn_to_pfn(buffer_mfn);
|
||||
|
||||
ring_req->seg[ring_req->nr_segments] =
|
||||
(struct blkif_request_segment) {
|
||||
.gref = ref,
|
||||
.first_sect = fsect,
|
||||
.last_sect = lsect };
|
||||
|
||||
ring_req->nr_segments++;
|
||||
}
|
||||
}
|
||||
|
||||
info->ring.req_prod_pvt++;
|
||||
|
||||
/* Keep a private copy so we can reissue requests when recovering. */
|
||||
info->shadow[id].req = *ring_req;
|
||||
|
||||
gnttab_free_grant_references(gref_head);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline void flush_requests(struct blkfront_info *info)
|
||||
{
|
||||
int notify;
|
||||
|
||||
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
|
||||
|
||||
if (notify)
|
||||
notify_remote_via_irq(info->irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* do_blkif_request
|
||||
* read a block; request is in a request queue
|
||||
*/
|
||||
static void do_blkif_request(request_queue_t *rq)
|
||||
{
|
||||
struct blkfront_info *info = NULL;
|
||||
struct request *req;
|
||||
int queued;
|
||||
|
||||
pr_debug("Entered do_blkif_request\n");
|
||||
|
||||
queued = 0;
|
||||
|
||||
while ((req = elv_next_request(rq)) != NULL) {
|
||||
info = req->rq_disk->private_data;
|
||||
if (!blk_fs_request(req)) {
|
||||
end_request(req, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RING_FULL(&info->ring))
|
||||
goto wait;
|
||||
|
||||
pr_debug("do_blk_req %p: cmd %p, sec %lx, "
|
||||
"(%u/%li) buffer:%p [%s]\n",
|
||||
req, req->cmd, (unsigned long)req->sector,
|
||||
req->current_nr_sectors,
|
||||
req->nr_sectors, req->buffer,
|
||||
rq_data_dir(req) ? "write" : "read");
|
||||
|
||||
|
||||
blkdev_dequeue_request(req);
|
||||
if (blkif_queue_request(req)) {
|
||||
blk_requeue_request(rq, req);
|
||||
wait:
|
||||
/* Avoid pointless unplugs. */
|
||||
blk_stop_queue(rq);
|
||||
break;
|
||||
}
|
||||
|
||||
queued++;
|
||||
}
|
||||
|
||||
if (queued != 0)
|
||||
flush_requests(info);
|
||||
}
|
||||
|
||||
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||
{
|
||||
request_queue_t *rq;
|
||||
|
||||
rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
|
||||
if (rq == NULL)
|
||||
return -1;
|
||||
|
||||
elevator_init(rq, "noop");
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
blk_queue_hardsect_size(rq, sector_size);
|
||||
blk_queue_max_sectors(rq, 512);
|
||||
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
||||
blk_queue_max_segment_size(rq, PAGE_SIZE);
|
||||
|
||||
/* Ensure a merged request will fit in a single I/O ring slot. */
|
||||
blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
|
||||
/* Make sure buffer addresses are sector-aligned. */
|
||||
blk_queue_dma_alignment(rq, 511);
|
||||
|
||||
gd->queue = rq;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int xlvbd_barrier(struct blkfront_info *info)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = blk_queue_ordered(info->rq,
|
||||
info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
|
||||
NULL);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
printk(KERN_INFO "blkfront: %s: barriers %s\n",
|
||||
info->gd->disk_name,
|
||||
info->feature_barrier ? "enabled" : "disabled");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
|
||||
int vdevice, u16 vdisk_info, u16 sector_size,
|
||||
struct blkfront_info *info)
|
||||
{
|
||||
struct gendisk *gd;
|
||||
int nr_minors = 1;
|
||||
int err = -ENODEV;
|
||||
|
||||
BUG_ON(info->gd != NULL);
|
||||
BUG_ON(info->rq != NULL);
|
||||
|
||||
if ((minor % PARTS_PER_DISK) == 0)
|
||||
nr_minors = PARTS_PER_DISK;
|
||||
|
||||
gd = alloc_disk(nr_minors);
|
||||
if (gd == NULL)
|
||||
goto out;
|
||||
|
||||
if (nr_minors > 1)
|
||||
sprintf(gd->disk_name, "%s%c", DEV_NAME,
|
||||
'a' + minor / PARTS_PER_DISK);
|
||||
else
|
||||
sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
|
||||
'a' + minor / PARTS_PER_DISK,
|
||||
minor % PARTS_PER_DISK);
|
||||
|
||||
gd->major = XENVBD_MAJOR;
|
||||
gd->first_minor = minor;
|
||||
gd->fops = &xlvbd_block_fops;
|
||||
gd->private_data = info;
|
||||
gd->driverfs_dev = &(info->xbdev->dev);
|
||||
set_capacity(gd, capacity);
|
||||
|
||||
if (xlvbd_init_blk_queue(gd, sector_size)) {
|
||||
del_gendisk(gd);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->rq = gd->queue;
|
||||
info->gd = gd;
|
||||
|
||||
if (info->feature_barrier)
|
||||
xlvbd_barrier(info);
|
||||
|
||||
if (vdisk_info & VDISK_READONLY)
|
||||
set_disk_ro(gd, 1);
|
||||
|
||||
if (vdisk_info & VDISK_REMOVABLE)
|
||||
gd->flags |= GENHD_FL_REMOVABLE;
|
||||
|
||||
if (vdisk_info & VDISK_CDROM)
|
||||
gd->flags |= GENHD_FL_CD;
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void kick_pending_request_queues(struct blkfront_info *info)
|
||||
{
|
||||
if (!RING_FULL(&info->ring)) {
|
||||
/* Re-enable calldowns. */
|
||||
blk_start_queue(info->rq);
|
||||
/* Kick things off immediately. */
|
||||
do_blkif_request(info->rq);
|
||||
}
|
||||
}
|
||||
|
||||
static void blkif_restart_queue(struct work_struct *work)
|
||||
{
|
||||
struct blkfront_info *info = container_of(work, struct blkfront_info, work);
|
||||
|
||||
spin_lock_irq(&blkif_io_lock);
|
||||
if (info->connected == BLKIF_STATE_CONNECTED)
|
||||
kick_pending_request_queues(info);
|
||||
spin_unlock_irq(&blkif_io_lock);
|
||||
}
|
||||
|
||||
static void blkif_free(struct blkfront_info *info, int suspend)
|
||||
{
|
||||
/* Prevent new requests being issued until we fix things up. */
|
||||
spin_lock_irq(&blkif_io_lock);
|
||||
info->connected = suspend ?
|
||||
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
|
||||
/* No more blkif_request(). */
|
||||
if (info->rq)
|
||||
blk_stop_queue(info->rq);
|
||||
/* No more gnttab callback work. */
|
||||
gnttab_cancel_free_callback(&info->callback);
|
||||
spin_unlock_irq(&blkif_io_lock);
|
||||
|
||||
/* Flush gnttab callback work. Must be done with no locks held. */
|
||||
flush_scheduled_work();
|
||||
|
||||
/* Free resources associated with old device channel. */
|
||||
if (info->ring_ref != GRANT_INVALID_REF) {
|
||||
gnttab_end_foreign_access(info->ring_ref, 0,
|
||||
(unsigned long)info->ring.sring);
|
||||
info->ring_ref = GRANT_INVALID_REF;
|
||||
info->ring.sring = NULL;
|
||||
}
|
||||
if (info->irq)
|
||||
unbind_from_irqhandler(info->irq, info);
|
||||
info->evtchn = info->irq = 0;
|
||||
|
||||
}
|
||||
|
||||
static void blkif_completion(struct blk_shadow *s)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < s->req.nr_segments; i++)
|
||||
gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
|
||||
}
|
||||
|
||||
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
struct request *req;
|
||||
struct blkif_response *bret;
|
||||
RING_IDX i, rp;
|
||||
unsigned long flags;
|
||||
struct blkfront_info *info = (struct blkfront_info *)dev_id;
|
||||
int uptodate;
|
||||
|
||||
spin_lock_irqsave(&blkif_io_lock, flags);
|
||||
|
||||
if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
|
||||
spin_unlock_irqrestore(&blkif_io_lock, flags);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
again:
|
||||
rp = info->ring.sring->rsp_prod;
|
||||
rmb(); /* Ensure we see queued responses up to 'rp'. */
|
||||
|
||||
for (i = info->ring.rsp_cons; i != rp; i++) {
|
||||
unsigned long id;
|
||||
int ret;
|
||||
|
||||
bret = RING_GET_RESPONSE(&info->ring, i);
|
||||
id = bret->id;
|
||||
req = (struct request *)info->shadow[id].request;
|
||||
|
||||
blkif_completion(&info->shadow[id]);
|
||||
|
||||
add_id_to_freelist(info, id);
|
||||
|
||||
uptodate = (bret->status == BLKIF_RSP_OKAY);
|
||||
switch (bret->operation) {
|
||||
case BLKIF_OP_WRITE_BARRIER:
|
||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
|
||||
info->gd->disk_name);
|
||||
uptodate = -EOPNOTSUPP;
|
||||
info->feature_barrier = 0;
|
||||
xlvbd_barrier(info);
|
||||
}
|
||||
/* fall through */
|
||||
case BLKIF_OP_READ:
|
||||
case BLKIF_OP_WRITE:
|
||||
if (unlikely(bret->status != BLKIF_RSP_OKAY))
|
||||
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
|
||||
"request: %x\n", bret->status);
|
||||
|
||||
ret = end_that_request_first(req, uptodate,
|
||||
req->hard_nr_sectors);
|
||||
BUG_ON(ret);
|
||||
end_that_request_last(req, uptodate);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
info->ring.rsp_cons = i;
|
||||
|
||||
if (i != info->ring.req_prod_pvt) {
|
||||
int more_to_do;
|
||||
RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
|
||||
if (more_to_do)
|
||||
goto again;
|
||||
} else
|
||||
info->ring.sring->rsp_event = i + 1;
|
||||
|
||||
kick_pending_request_queues(info);
|
||||
|
||||
spin_unlock_irqrestore(&blkif_io_lock, flags);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
||||
static int setup_blkring(struct xenbus_device *dev,
|
||||
struct blkfront_info *info)
|
||||
{
|
||||
struct blkif_sring *sring;
|
||||
int err;
|
||||
|
||||
info->ring_ref = GRANT_INVALID_REF;
|
||||
|
||||
sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
|
||||
if (!sring) {
|
||||
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
|
||||
return -ENOMEM;
|
||||
}
|
||||
SHARED_RING_INIT(sring);
|
||||
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
||||
|
||||
err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
|
||||
if (err < 0) {
|
||||
free_page((unsigned long)sring);
|
||||
info->ring.sring = NULL;
|
||||
goto fail;
|
||||
}
|
||||
info->ring_ref = err;
|
||||
|
||||
err = xenbus_alloc_evtchn(dev, &info->evtchn);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = bind_evtchn_to_irqhandler(info->evtchn,
|
||||
blkif_interrupt,
|
||||
IRQF_SAMPLE_RANDOM, "blkif", info);
|
||||
if (err <= 0) {
|
||||
xenbus_dev_fatal(dev, err,
|
||||
"bind_evtchn_to_irqhandler failed");
|
||||
goto fail;
|
||||
}
|
||||
info->irq = err;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
blkif_free(info, 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/* Common code used when first setting up, and when resuming. */
|
||||
static int talk_to_backend(struct xenbus_device *dev,
|
||||
struct blkfront_info *info)
|
||||
{
|
||||
const char *message = NULL;
|
||||
struct xenbus_transaction xbt;
|
||||
int err;
|
||||
|
||||
/* Create shared ring, alloc event channel. */
|
||||
err = setup_blkring(dev, info);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
again:
|
||||
err = xenbus_transaction_start(&xbt);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "starting transaction");
|
||||
goto destroy_blkring;
|
||||
}
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"ring-ref", "%u", info->ring_ref);
|
||||
if (err) {
|
||||
message = "writing ring-ref";
|
||||
goto abort_transaction;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"event-channel", "%u", info->evtchn);
|
||||
if (err) {
|
||||
message = "writing event-channel";
|
||||
goto abort_transaction;
|
||||
}
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err) {
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
xenbus_dev_fatal(dev, err, "completing transaction");
|
||||
goto destroy_blkring;
|
||||
}
|
||||
|
||||
xenbus_switch_state(dev, XenbusStateInitialised);
|
||||
|
||||
return 0;
|
||||
|
||||
abort_transaction:
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
if (message)
|
||||
xenbus_dev_fatal(dev, err, "%s", message);
|
||||
destroy_blkring:
|
||||
blkif_free(info, 0);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Entry point to this code when a new device is created. Allocate the basic
|
||||
* structures and the ring buffer for communication with the backend, and
|
||||
* inform the backend of the appropriate details for those. Switch to
|
||||
* Initialised state.
|
||||
*/
|
||||
static int blkfront_probe(struct xenbus_device *dev,
|
||||
const struct xenbus_device_id *id)
|
||||
{
|
||||
int err, vdevice, i;
|
||||
struct blkfront_info *info;
|
||||
|
||||
/* FIXME: Use dynamic device id if this is not set. */
|
||||
err = xenbus_scanf(XBT_NIL, dev->nodename,
|
||||
"virtual-device", "%i", &vdevice);
|
||||
if (err != 1) {
|
||||
xenbus_dev_fatal(dev, err, "reading virtual-device");
|
||||
return err;
|
||||
}
|
||||
|
||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
info->xbdev = dev;
|
||||
info->vdevice = vdevice;
|
||||
info->connected = BLKIF_STATE_DISCONNECTED;
|
||||
INIT_WORK(&info->work, blkif_restart_queue);
|
||||
|
||||
for (i = 0; i < BLK_RING_SIZE; i++)
|
||||
info->shadow[i].req.id = i+1;
|
||||
info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
||||
|
||||
/* Front end dir is a number, which is used as the id. */
|
||||
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
|
||||
dev->dev.driver_data = info;
|
||||
|
||||
err = talk_to_backend(dev, info);
|
||||
if (err) {
|
||||
kfree(info);
|
||||
dev->dev.driver_data = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int blkif_recover(struct blkfront_info *info)
|
||||
{
|
||||
int i;
|
||||
struct blkif_request *req;
|
||||
struct blk_shadow *copy;
|
||||
int j;
|
||||
|
||||
/* Stage 1: Make a safe copy of the shadow state. */
|
||||
copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
|
||||
if (!copy)
|
||||
return -ENOMEM;
|
||||
memcpy(copy, info->shadow, sizeof(info->shadow));
|
||||
|
||||
/* Stage 2: Set up free list. */
|
||||
memset(&info->shadow, 0, sizeof(info->shadow));
|
||||
for (i = 0; i < BLK_RING_SIZE; i++)
|
||||
info->shadow[i].req.id = i+1;
|
||||
info->shadow_free = info->ring.req_prod_pvt;
|
||||
info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
||||
|
||||
/* Stage 3: Find pending requests and requeue them. */
|
||||
for (i = 0; i < BLK_RING_SIZE; i++) {
|
||||
/* Not in use? */
|
||||
if (copy[i].request == 0)
|
||||
continue;
|
||||
|
||||
/* Grab a request slot and copy shadow state into it. */
|
||||
req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
||||
*req = copy[i].req;
|
||||
|
||||
/* We get a new request id, and must reset the shadow state. */
|
||||
req->id = get_id_from_freelist(info);
|
||||
memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
|
||||
|
||||
/* Rewrite any grant references invalidated by susp/resume. */
|
||||
for (j = 0; j < req->nr_segments; j++)
|
||||
gnttab_grant_foreign_access_ref(
|
||||
req->seg[j].gref,
|
||||
info->xbdev->otherend_id,
|
||||
pfn_to_mfn(info->shadow[req->id].frame[j]),
|
||||
rq_data_dir(
|
||||
(struct request *)
|
||||
info->shadow[req->id].request));
|
||||
info->shadow[req->id].req = *req;
|
||||
|
||||
info->ring.req_prod_pvt++;
|
||||
}
|
||||
|
||||
kfree(copy);
|
||||
|
||||
xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
||||
|
||||
spin_lock_irq(&blkif_io_lock);
|
||||
|
||||
/* Now safe for us to use the shared ring */
|
||||
info->connected = BLKIF_STATE_CONNECTED;
|
||||
|
||||
/* Send off requeued requests */
|
||||
flush_requests(info);
|
||||
|
||||
/* Kick any other new requests queued since we resumed */
|
||||
kick_pending_request_queues(info);
|
||||
|
||||
spin_unlock_irq(&blkif_io_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* We are reconnecting to the backend, due to a suspend/resume, or a backend
|
||||
* driver restart. We tear down our blkif structure and recreate it, but
|
||||
* leave the device-layer structures intact so that this is transparent to the
|
||||
* rest of the kernel.
|
||||
*/
|
||||
static int blkfront_resume(struct xenbus_device *dev)
|
||||
{
|
||||
struct blkfront_info *info = dev->dev.driver_data;
|
||||
int err;
|
||||
|
||||
dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
|
||||
|
||||
blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
|
||||
|
||||
err = talk_to_backend(dev, info);
|
||||
if (info->connected == BLKIF_STATE_SUSPENDED && !err)
|
||||
err = blkif_recover(info);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Invoked when the backend is finally 'ready' (and has told produced
|
||||
* the details about the physical device - #sectors, size, etc).
|
||||
*/
|
||||
static void blkfront_connect(struct blkfront_info *info)
|
||||
{
|
||||
unsigned long long sectors;
|
||||
unsigned long sector_size;
|
||||
unsigned int binfo;
|
||||
int err;
|
||||
|
||||
if ((info->connected == BLKIF_STATE_CONNECTED) ||
|
||||
(info->connected == BLKIF_STATE_SUSPENDED) )
|
||||
return;
|
||||
|
||||
dev_dbg(&info->xbdev->dev, "%s:%s.\n",
|
||||
__func__, info->xbdev->otherend);
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"sectors", "%llu", §ors,
|
||||
"info", "%u", &binfo,
|
||||
"sector-size", "%lu", §or_size,
|
||||
NULL);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err,
|
||||
"reading backend fields at %s",
|
||||
info->xbdev->otherend);
|
||||
return;
|
||||
}
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"feature-barrier", "%lu", &info->feature_barrier,
|
||||
NULL);
|
||||
if (err)
|
||||
info->feature_barrier = 0;
|
||||
|
||||
err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
|
||||
sectors, info->vdevice,
|
||||
binfo, sector_size, info);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
||||
info->xbdev->otherend);
|
||||
return;
|
||||
}
|
||||
|
||||
xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
||||
|
||||
/* Kick pending requests. */
|
||||
spin_lock_irq(&blkif_io_lock);
|
||||
info->connected = BLKIF_STATE_CONNECTED;
|
||||
kick_pending_request_queues(info);
|
||||
spin_unlock_irq(&blkif_io_lock);
|
||||
|
||||
add_disk(info->gd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the change of state of the backend to Closing. We must delete our
|
||||
* device-layer structures now, to ensure that writes are flushed through to
|
||||
* the backend. Once is this done, we can switch to Closed in
|
||||
* acknowledgement.
|
||||
*/
|
||||
static void blkfront_closing(struct xenbus_device *dev)
|
||||
{
|
||||
struct blkfront_info *info = dev->dev.driver_data;
|
||||
unsigned long flags;
|
||||
|
||||
dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
|
||||
|
||||
if (info->rq == NULL)
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&blkif_io_lock, flags);
|
||||
|
||||
del_gendisk(info->gd);
|
||||
|
||||
/* No more blkif_request(). */
|
||||
blk_stop_queue(info->rq);
|
||||
|
||||
/* No more gnttab callback work. */
|
||||
gnttab_cancel_free_callback(&info->callback);
|
||||
spin_unlock_irqrestore(&blkif_io_lock, flags);
|
||||
|
||||
/* Flush gnttab callback work. Must be done with no locks held. */
|
||||
flush_scheduled_work();
|
||||
|
||||
blk_cleanup_queue(info->rq);
|
||||
info->rq = NULL;
|
||||
|
||||
out:
|
||||
xenbus_frontend_closed(dev);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback received when the backend's state changes.
|
||||
*/
|
||||
static void backend_changed(struct xenbus_device *dev,
|
||||
enum xenbus_state backend_state)
|
||||
{
|
||||
struct blkfront_info *info = dev->dev.driver_data;
|
||||
struct block_device *bd;
|
||||
|
||||
dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
|
||||
|
||||
switch (backend_state) {
|
||||
case XenbusStateInitialising:
|
||||
case XenbusStateInitWait:
|
||||
case XenbusStateInitialised:
|
||||
case XenbusStateUnknown:
|
||||
case XenbusStateClosed:
|
||||
break;
|
||||
|
||||
case XenbusStateConnected:
|
||||
blkfront_connect(info);
|
||||
break;
|
||||
|
||||
case XenbusStateClosing:
|
||||
bd = bdget(info->dev);
|
||||
if (bd == NULL)
|
||||
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
|
||||
|
||||
mutex_lock(&bd->bd_mutex);
|
||||
if (info->users > 0)
|
||||
xenbus_dev_error(dev, -EBUSY,
|
||||
"Device in use; refusing to close");
|
||||
else
|
||||
blkfront_closing(dev);
|
||||
mutex_unlock(&bd->bd_mutex);
|
||||
bdput(bd);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int blkfront_remove(struct xenbus_device *dev)
|
||||
{
|
||||
struct blkfront_info *info = dev->dev.driver_data;
|
||||
|
||||
dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
|
||||
|
||||
blkif_free(info, 0);
|
||||
|
||||
kfree(info);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkif_open(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
||||
info->users++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkif_release(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
||||
info->users--;
|
||||
if (info->users == 0) {
|
||||
/* Check whether we have been instructed to close. We will
|
||||
have ignored this request initially, as the device was
|
||||
still mounted. */
|
||||
struct xenbus_device *dev = info->xbdev;
|
||||
enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
|
||||
|
||||
if (state == XenbusStateClosing)
|
||||
blkfront_closing(dev);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct block_device_operations xlvbd_block_fops =
|
||||
{
|
||||
.owner = THIS_MODULE,
|
||||
.open = blkif_open,
|
||||
.release = blkif_release,
|
||||
};
|
||||
|
||||
|
||||
static struct xenbus_device_id blkfront_ids[] = {
|
||||
{ "vbd" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
static struct xenbus_driver blkfront = {
|
||||
.name = "vbd",
|
||||
.owner = THIS_MODULE,
|
||||
.ids = blkfront_ids,
|
||||
.probe = blkfront_probe,
|
||||
.remove = blkfront_remove,
|
||||
.resume = blkfront_resume,
|
||||
.otherend_changed = backend_changed,
|
||||
};
|
||||
|
||||
static int __init xlblk_init(void)
|
||||
{
|
||||
if (!is_running_on_xen())
|
||||
return -ENODEV;
|
||||
|
||||
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
|
||||
printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
|
||||
XENVBD_MAJOR, DEV_NAME);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return xenbus_register_frontend(&blkfront);
|
||||
}
|
||||
module_init(xlblk_init);
|
||||
|
||||
|
||||
static void xlblk_exit(void)
|
||||
{
|
||||
return xenbus_unregister_driver(&blkfront);
|
||||
}
|
||||
module_exit(xlblk_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Xen virtual block device frontend");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
|
|
@ -604,6 +604,14 @@ config HVC_BEAT
|
|||
help
|
||||
Toshiba's Cell Reference Set Beat Console device driver
|
||||
|
||||
config HVC_XEN
|
||||
bool "Xen Hypervisor Console support"
|
||||
depends on XEN
|
||||
select HVC_DRIVER
|
||||
default y
|
||||
help
|
||||
Xen virtual console device driver
|
||||
|
||||
config HVCS
|
||||
tristate "IBM Hypervisor Virtual Console Server support"
|
||||
depends on PPC_PSERIES
|
||||
|
|
|
@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
|
|||
obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
|
||||
obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
|
||||
obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
|
||||
obj-$(CONFIG_HVC_XEN) += hvc_xen.o
|
||||
obj-$(CONFIG_RAW_DRIVER) += raw.o
|
||||
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
|
||||
obj-$(CONFIG_MSPEC) += mspec.o
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* xen console driver interface to hvc_console.c
|
||||
*
|
||||
* (c) 2007 Gerd Hoffmann <kraxel@suse.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/console.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/interface/io/console.h>
|
||||
#include <xen/hvc-console.h>
|
||||
|
||||
#include "hvc_console.h"
|
||||
|
||||
#define HVC_COOKIE 0x58656e /* "Xen" in hex */
|
||||
|
||||
static struct hvc_struct *hvc;
|
||||
static int xencons_irq;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static inline struct xencons_interface *xencons_interface(void)
|
||||
{
|
||||
return mfn_to_virt(xen_start_info->console.domU.mfn);
|
||||
}
|
||||
|
||||
static inline void notify_daemon(void)
|
||||
{
|
||||
/* Use evtchn: this is called early, before irq is set up. */
|
||||
notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
|
||||
}
|
||||
|
||||
static int write_console(uint32_t vtermno, const char *data, int len)
|
||||
{
|
||||
struct xencons_interface *intf = xencons_interface();
|
||||
XENCONS_RING_IDX cons, prod;
|
||||
int sent = 0;
|
||||
|
||||
cons = intf->out_cons;
|
||||
prod = intf->out_prod;
|
||||
mb(); /* update queue values before going on */
|
||||
BUG_ON((prod - cons) > sizeof(intf->out));
|
||||
|
||||
while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
|
||||
intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
|
||||
|
||||
wmb(); /* write ring before updating pointer */
|
||||
intf->out_prod = prod;
|
||||
|
||||
notify_daemon();
|
||||
return sent;
|
||||
}
|
||||
|
||||
static int read_console(uint32_t vtermno, char *buf, int len)
|
||||
{
|
||||
struct xencons_interface *intf = xencons_interface();
|
||||
XENCONS_RING_IDX cons, prod;
|
||||
int recv = 0;
|
||||
|
||||
cons = intf->in_cons;
|
||||
prod = intf->in_prod;
|
||||
mb(); /* get pointers before reading ring */
|
||||
BUG_ON((prod - cons) > sizeof(intf->in));
|
||||
|
||||
while (cons != prod && recv < len)
|
||||
buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)];
|
||||
|
||||
mb(); /* read ring before consuming */
|
||||
intf->in_cons = cons;
|
||||
|
||||
notify_daemon();
|
||||
return recv;
|
||||
}
|
||||
|
||||
static struct hv_ops hvc_ops = {
|
||||
.get_chars = read_console,
|
||||
.put_chars = write_console,
|
||||
};
|
||||
|
||||
static int __init xen_init(void)
|
||||
{
|
||||
struct hvc_struct *hp;
|
||||
|
||||
if (!is_running_on_xen())
|
||||
return 0;
|
||||
|
||||
xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
|
||||
if (xencons_irq < 0)
|
||||
xencons_irq = 0 /* NO_IRQ */;
|
||||
hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
|
||||
if (IS_ERR(hp))
|
||||
return PTR_ERR(hp);
|
||||
|
||||
hvc = hp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit xen_fini(void)
|
||||
{
|
||||
if (hvc)
|
||||
hvc_remove(hvc);
|
||||
}
|
||||
|
||||
static int xen_cons_init(void)
|
||||
{
|
||||
if (!is_running_on_xen())
|
||||
return 0;
|
||||
|
||||
hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(xen_init);
|
||||
module_exit(xen_fini);
|
||||
console_initcall(xen_cons_init);
|
||||
|
||||
static void xenboot_write_console(struct console *console, const char *string,
|
||||
unsigned len)
|
||||
{
|
||||
unsigned int linelen, off = 0;
|
||||
const char *pos;
|
||||
|
||||
while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
|
||||
linelen = pos-string+off;
|
||||
if (off + linelen > len)
|
||||
break;
|
||||
write_console(0, string+off, linelen);
|
||||
write_console(0, "\r\n", 2);
|
||||
off += linelen + 1;
|
||||
}
|
||||
if (off < len)
|
||||
write_console(0, string+off, len-off);
|
||||
}
|
||||
|
||||
struct console xenboot_console = {
|
||||
.name = "xenboot",
|
||||
.write = xenboot_write_console,
|
||||
.flags = CON_PRINTBUFFER | CON_BOOT,
|
||||
};
|
|
@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void)
|
|||
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
|
||||
NULL };
|
||||
|
||||
return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
|
||||
return call_usermodehelper(critical_overtemp_path,
|
||||
argv, envp, UMH_WAIT_EXEC);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -80,7 +80,8 @@ int wf_critical_overtemp(void)
|
|||
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
|
||||
NULL };
|
||||
|
||||
return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
|
||||
return call_usermodehelper(critical_overtemp_path,
|
||||
argv, envp, UMH_WAIT_EXEC);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wf_critical_overtemp);
|
||||
|
||||
|
|
|
@ -2486,6 +2486,18 @@ source "drivers/atm/Kconfig"
|
|||
|
||||
source "drivers/s390/net/Kconfig"
|
||||
|
||||
config XEN_NETDEV_FRONTEND
|
||||
tristate "Xen network device frontend driver"
|
||||
depends on XEN
|
||||
default y
|
||||
help
|
||||
The network device frontend driver allows the kernel to
|
||||
access network devices exported exported by a virtual
|
||||
machine containing a physical network device driver. The
|
||||
frontend driver is intended for unprivileged guest domains;
|
||||
if you are compiling a kernel for a Xen guest, you almost
|
||||
certainly want to enable this.
|
||||
|
||||
config ISERIES_VETH
|
||||
tristate "iSeries Virtual Ethernet driver support"
|
||||
depends on PPC_ISERIES
|
||||
|
|
|
@ -127,6 +127,8 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
|
|||
obj-$(CONFIG_SLIP) += slip.o
|
||||
obj-$(CONFIG_SLHC) += slhc.o
|
||||
|
||||
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
|
||||
|
||||
obj-$(CONFIG_DUMMY) += dummy.o
|
||||
obj-$(CONFIG_IFB) += ifb.o
|
||||
obj-$(CONFIG_MACVLAN) += macvlan.o
|
||||
|
|
|
@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc)
|
|||
sprintf(portarg, "%ld", bc->pdev->port->base);
|
||||
printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
|
||||
|
||||
return call_usermodehelper(eppconfig_path, argv, envp, 1);
|
||||
return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
|
|||
info->location_id, info->serial, info->capabilities);
|
||||
envp[i] = NULL;
|
||||
|
||||
value = call_usermodehelper (argv [0], argv, envp, 0);
|
||||
value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
|
||||
kfree (buf);
|
||||
kfree (envp);
|
||||
return 0;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/kthread.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <asm/oplib.h>
|
||||
#include <asm/ebus.h>
|
||||
|
||||
|
@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp)
|
|||
static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
|
||||
{
|
||||
static int shutting_down = 0;
|
||||
static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
|
||||
char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
|
||||
char *type = "???";
|
||||
s8 val = -1;
|
||||
|
||||
|
@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
|
|||
printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
|
||||
|
||||
shutting_down = 1;
|
||||
if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
|
||||
if (orderly_poweroff(true) < 0)
|
||||
printk(KERN_CRIT "envctrl: shutdown execution failed\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/ioport.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/reboot.h>
|
||||
|
||||
#include <asm/ebus.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type)
|
|||
static void envctrl_do_shutdown(void)
|
||||
{
|
||||
static int inprog = 0;
|
||||
static char *envp[] = {
|
||||
"HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
|
||||
char *argv[] = {
|
||||
"/sbin/shutdown", "-h", "now", NULL };
|
||||
int ret;
|
||||
|
||||
if (inprog != 0)
|
||||
|
@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void)
|
|||
|
||||
inprog = 1;
|
||||
printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
|
||||
ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0);
|
||||
ret = orderly_poweroff(true);
|
||||
if (ret < 0) {
|
||||
printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
|
||||
inprog = 0; /* unlikely to succeed, but we could try again */
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
obj-y += grant-table.o
|
||||
obj-y += xenbus/
|
|
@ -0,0 +1,582 @@
|
|||
/******************************************************************************
|
||||
* grant_table.c
|
||||
*
|
||||
* Granting foreign access to our memory reservation.
|
||||
*
|
||||
* Copyright (c) 2005-2006, Christopher Clark
|
||||
* Copyright (c) 2004-2005, K A Fraser
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/grant_table.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/sync_bitops.h>
|
||||
|
||||
|
||||
/* External tools reserve first few grant table entries. */
|
||||
#define NR_RESERVED_ENTRIES 8
|
||||
#define GNTTAB_LIST_END 0xffffffff
|
||||
#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
|
||||
|
||||
static grant_ref_t **gnttab_list;
|
||||
static unsigned int nr_grant_frames;
|
||||
static unsigned int boot_max_nr_grant_frames;
|
||||
static int gnttab_free_count;
|
||||
static grant_ref_t gnttab_free_head;
|
||||
static DEFINE_SPINLOCK(gnttab_list_lock);
|
||||
|
||||
static struct grant_entry *shared;
|
||||
|
||||
static struct gnttab_free_callback *gnttab_free_callback_list;
|
||||
|
||||
static int gnttab_expand(unsigned int req_entries);
|
||||
|
||||
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
|
||||
|
||||
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
|
||||
{
|
||||
return &gnttab_list[(entry) / RPP][(entry) % RPP];
|
||||
}
|
||||
/* This can be used as an l-value */
|
||||
#define gnttab_entry(entry) (*__gnttab_entry(entry))
|
||||
|
||||
static int get_free_entries(unsigned count)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ref, rc;
|
||||
grant_ref_t head;
|
||||
|
||||
spin_lock_irqsave(&gnttab_list_lock, flags);
|
||||
|
||||
if ((gnttab_free_count < count) &&
|
||||
((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
return rc;
|
||||
}
|
||||
|
||||
ref = head = gnttab_free_head;
|
||||
gnttab_free_count -= count;
|
||||
while (count-- > 1)
|
||||
head = gnttab_entry(head);
|
||||
gnttab_free_head = gnttab_entry(head);
|
||||
gnttab_entry(head) = GNTTAB_LIST_END;
|
||||
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
static void do_free_callbacks(void)
|
||||
{
|
||||
struct gnttab_free_callback *callback, *next;
|
||||
|
||||
callback = gnttab_free_callback_list;
|
||||
gnttab_free_callback_list = NULL;
|
||||
|
||||
while (callback != NULL) {
|
||||
next = callback->next;
|
||||
if (gnttab_free_count >= callback->count) {
|
||||
callback->next = NULL;
|
||||
callback->fn(callback->arg);
|
||||
} else {
|
||||
callback->next = gnttab_free_callback_list;
|
||||
gnttab_free_callback_list = callback;
|
||||
}
|
||||
callback = next;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void check_free_callbacks(void)
|
||||
{
|
||||
if (unlikely(gnttab_free_callback_list))
|
||||
do_free_callbacks();
|
||||
}
|
||||
|
||||
static void put_free_entry(grant_ref_t ref)
|
||||
{
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&gnttab_list_lock, flags);
|
||||
gnttab_entry(ref) = gnttab_free_head;
|
||||
gnttab_free_head = ref;
|
||||
gnttab_free_count++;
|
||||
check_free_callbacks();
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
}
|
||||
|
||||
static void update_grant_entry(grant_ref_t ref, domid_t domid,
|
||||
unsigned long frame, unsigned flags)
|
||||
{
|
||||
/*
|
||||
* Introducing a valid entry into the grant table:
|
||||
* 1. Write ent->domid.
|
||||
* 2. Write ent->frame:
|
||||
* GTF_permit_access: Frame to which access is permitted.
|
||||
* GTF_accept_transfer: Pseudo-phys frame slot being filled by new
|
||||
* frame, or zero if none.
|
||||
* 3. Write memory barrier (WMB).
|
||||
* 4. Write ent->flags, inc. valid type.
|
||||
*/
|
||||
shared[ref].frame = frame;
|
||||
shared[ref].domid = domid;
|
||||
wmb();
|
||||
shared[ref].flags = flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* Public grant-issuing interface functions
|
||||
*/
|
||||
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
|
||||
unsigned long frame, int readonly)
|
||||
{
|
||||
update_grant_entry(ref, domid, frame,
|
||||
GTF_permit_access | (readonly ? GTF_readonly : 0));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
|
||||
|
||||
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
|
||||
int readonly)
|
||||
{
|
||||
int ref;
|
||||
|
||||
ref = get_free_entries(1);
|
||||
if (unlikely(ref < 0))
|
||||
return -ENOSPC;
|
||||
|
||||
gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
|
||||
|
||||
return ref;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
|
||||
|
||||
int gnttab_query_foreign_access(grant_ref_t ref)
|
||||
{
|
||||
u16 nflags;
|
||||
|
||||
nflags = shared[ref].flags;
|
||||
|
||||
return (nflags & (GTF_reading|GTF_writing));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
|
||||
|
||||
int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
|
||||
{
|
||||
u16 flags, nflags;
|
||||
|
||||
nflags = shared[ref].flags;
|
||||
do {
|
||||
flags = nflags;
|
||||
if (flags & (GTF_reading|GTF_writing)) {
|
||||
printk(KERN_ALERT "WARNING: g.e. still in use!\n");
|
||||
return 0;
|
||||
}
|
||||
} while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
|
||||
|
||||
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
|
||||
unsigned long page)
|
||||
{
|
||||
if (gnttab_end_foreign_access_ref(ref, readonly)) {
|
||||
put_free_entry(ref);
|
||||
if (page != 0)
|
||||
free_page(page);
|
||||
} else {
|
||||
/* XXX This needs to be fixed so that the ref and page are
|
||||
placed on a list to be freed up later. */
|
||||
printk(KERN_WARNING
|
||||
"WARNING: leaking g.e. and page still in use!\n");
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
|
||||
|
||||
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
|
||||
{
|
||||
int ref;
|
||||
|
||||
ref = get_free_entries(1);
|
||||
if (unlikely(ref < 0))
|
||||
return -ENOSPC;
|
||||
gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
|
||||
|
||||
return ref;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
|
||||
|
||||
void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
|
||||
unsigned long pfn)
|
||||
{
|
||||
update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
|
||||
|
||||
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
|
||||
{
|
||||
unsigned long frame;
|
||||
u16 flags;
|
||||
|
||||
/*
|
||||
* If a transfer is not even yet started, try to reclaim the grant
|
||||
* reference and return failure (== 0).
|
||||
*/
|
||||
while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
|
||||
if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
|
||||
return 0;
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* If a transfer is in progress then wait until it is completed. */
|
||||
while (!(flags & GTF_transfer_completed)) {
|
||||
flags = shared[ref].flags;
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
rmb(); /* Read the frame number /after/ reading completion status. */
|
||||
frame = shared[ref].frame;
|
||||
BUG_ON(frame == 0);
|
||||
|
||||
return frame;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
|
||||
|
||||
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
|
||||
{
|
||||
unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
|
||||
put_free_entry(ref);
|
||||
return frame;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
|
||||
|
||||
void gnttab_free_grant_reference(grant_ref_t ref)
|
||||
{
|
||||
put_free_entry(ref);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
|
||||
|
||||
void gnttab_free_grant_references(grant_ref_t head)
|
||||
{
|
||||
grant_ref_t ref;
|
||||
unsigned long flags;
|
||||
int count = 1;
|
||||
if (head == GNTTAB_LIST_END)
|
||||
return;
|
||||
spin_lock_irqsave(&gnttab_list_lock, flags);
|
||||
ref = head;
|
||||
while (gnttab_entry(ref) != GNTTAB_LIST_END) {
|
||||
ref = gnttab_entry(ref);
|
||||
count++;
|
||||
}
|
||||
gnttab_entry(ref) = gnttab_free_head;
|
||||
gnttab_free_head = head;
|
||||
gnttab_free_count += count;
|
||||
check_free_callbacks();
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
|
||||
|
||||
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
|
||||
{
|
||||
int h = get_free_entries(count);
|
||||
|
||||
if (h < 0)
|
||||
return -ENOSPC;
|
||||
|
||||
*head = h;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
|
||||
|
||||
int gnttab_empty_grant_references(const grant_ref_t *private_head)
|
||||
{
|
||||
return (*private_head == GNTTAB_LIST_END);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
|
||||
|
||||
int gnttab_claim_grant_reference(grant_ref_t *private_head)
|
||||
{
|
||||
grant_ref_t g = *private_head;
|
||||
if (unlikely(g == GNTTAB_LIST_END))
|
||||
return -ENOSPC;
|
||||
*private_head = gnttab_entry(g);
|
||||
return g;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
|
||||
|
||||
void gnttab_release_grant_reference(grant_ref_t *private_head,
|
||||
grant_ref_t release)
|
||||
{
|
||||
gnttab_entry(release) = *private_head;
|
||||
*private_head = release;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
|
||||
|
||||
void gnttab_request_free_callback(struct gnttab_free_callback *callback,
|
||||
void (*fn)(void *), void *arg, u16 count)
|
||||
{
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&gnttab_list_lock, flags);
|
||||
if (callback->next)
|
||||
goto out;
|
||||
callback->fn = fn;
|
||||
callback->arg = arg;
|
||||
callback->count = count;
|
||||
callback->next = gnttab_free_callback_list;
|
||||
gnttab_free_callback_list = callback;
|
||||
check_free_callbacks();
|
||||
out:
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
|
||||
|
||||
void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
|
||||
{
|
||||
struct gnttab_free_callback **pcb;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&gnttab_list_lock, flags);
|
||||
for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
|
||||
if (*pcb == callback) {
|
||||
*pcb = callback->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
|
||||
|
||||
static int grow_gnttab_list(unsigned int more_frames)
|
||||
{
|
||||
unsigned int new_nr_grant_frames, extra_entries, i;
|
||||
|
||||
new_nr_grant_frames = nr_grant_frames + more_frames;
|
||||
extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
|
||||
|
||||
for (i = nr_grant_frames; i < new_nr_grant_frames; i++) {
|
||||
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
|
||||
if (!gnttab_list[i])
|
||||
goto grow_nomem;
|
||||
}
|
||||
|
||||
|
||||
for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
|
||||
i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
|
||||
gnttab_entry(i) = i + 1;
|
||||
|
||||
gnttab_entry(i) = gnttab_free_head;
|
||||
gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
|
||||
gnttab_free_count += extra_entries;
|
||||
|
||||
nr_grant_frames = new_nr_grant_frames;
|
||||
|
||||
check_free_callbacks();
|
||||
|
||||
return 0;
|
||||
|
||||
grow_nomem:
|
||||
for ( ; i >= nr_grant_frames; i--)
|
||||
free_page((unsigned long) gnttab_list[i]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static unsigned int __max_nr_grant_frames(void)
|
||||
{
|
||||
struct gnttab_query_size query;
|
||||
int rc;
|
||||
|
||||
query.dom = DOMID_SELF;
|
||||
|
||||
rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
|
||||
if ((rc < 0) || (query.status != GNTST_okay))
|
||||
return 4; /* Legacy max supported number of frames */
|
||||
|
||||
return query.max_nr_frames;
|
||||
}
|
||||
|
||||
static inline unsigned int max_nr_grant_frames(void)
|
||||
{
|
||||
unsigned int xen_max = __max_nr_grant_frames();
|
||||
|
||||
if (xen_max > boot_max_nr_grant_frames)
|
||||
return boot_max_nr_grant_frames;
|
||||
return xen_max;
|
||||
}
|
||||
|
||||
static int map_pte_fn(pte_t *pte, struct page *pmd_page,
|
||||
unsigned long addr, void *data)
|
||||
{
|
||||
unsigned long **frames = (unsigned long **)data;
|
||||
|
||||
set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
|
||||
(*frames)++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
|
||||
unsigned long addr, void *data)
|
||||
{
|
||||
|
||||
set_pte_at(&init_mm, addr, pte, __pte(0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
|
||||
{
|
||||
struct gnttab_setup_table setup;
|
||||
unsigned long *frames;
|
||||
unsigned int nr_gframes = end_idx + 1;
|
||||
int rc;
|
||||
|
||||
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
|
||||
if (!frames)
|
||||
return -ENOMEM;
|
||||
|
||||
setup.dom = DOMID_SELF;
|
||||
setup.nr_frames = nr_gframes;
|
||||
setup.frame_list = frames;
|
||||
|
||||
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
|
||||
if (rc == -ENOSYS) {
|
||||
kfree(frames);
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
BUG_ON(rc || setup.status);
|
||||
|
||||
if (shared == NULL) {
|
||||
struct vm_struct *area;
|
||||
area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
|
||||
BUG_ON(area == NULL);
|
||||
shared = area->addr;
|
||||
}
|
||||
rc = apply_to_page_range(&init_mm, (unsigned long)shared,
|
||||
PAGE_SIZE * nr_gframes,
|
||||
map_pte_fn, &frames);
|
||||
BUG_ON(rc);
|
||||
frames -= nr_gframes; /* adjust after map_pte_fn() */
|
||||
|
||||
kfree(frames);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gnttab_resume(void)
|
||||
{
|
||||
if (max_nr_grant_frames() < nr_grant_frames)
|
||||
return -ENOSYS;
|
||||
return gnttab_map(0, nr_grant_frames - 1);
|
||||
}
|
||||
|
||||
static int gnttab_suspend(void)
|
||||
{
|
||||
apply_to_page_range(&init_mm, (unsigned long)shared,
|
||||
PAGE_SIZE * nr_grant_frames,
|
||||
unmap_pte_fn, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gnttab_expand(unsigned int req_entries)
|
||||
{
|
||||
int rc;
|
||||
unsigned int cur, extra;
|
||||
|
||||
cur = nr_grant_frames;
|
||||
extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
|
||||
GREFS_PER_GRANT_FRAME);
|
||||
if (cur + extra > max_nr_grant_frames())
|
||||
return -ENOSPC;
|
||||
|
||||
rc = gnttab_map(cur, cur + extra - 1);
|
||||
if (rc == 0)
|
||||
rc = grow_gnttab_list(extra);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __devinit gnttab_init(void)
|
||||
{
|
||||
int i;
|
||||
unsigned int max_nr_glist_frames;
|
||||
unsigned int nr_init_grefs;
|
||||
|
||||
if (!is_running_on_xen())
|
||||
return -ENODEV;
|
||||
|
||||
nr_grant_frames = 1;
|
||||
boot_max_nr_grant_frames = __max_nr_grant_frames();
|
||||
|
||||
/* Determine the maximum number of frames required for the
|
||||
* grant reference free list on the current hypervisor.
|
||||
*/
|
||||
max_nr_glist_frames = (boot_max_nr_grant_frames *
|
||||
GREFS_PER_GRANT_FRAME /
|
||||
(PAGE_SIZE / sizeof(grant_ref_t)));
|
||||
|
||||
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
|
||||
GFP_KERNEL);
|
||||
if (gnttab_list == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr_grant_frames; i++) {
|
||||
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
|
||||
if (gnttab_list[i] == NULL)
|
||||
goto ini_nomem;
|
||||
}
|
||||
|
||||
if (gnttab_resume() < 0)
|
||||
return -ENODEV;
|
||||
|
||||
nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
|
||||
|
||||
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
|
||||
gnttab_entry(i) = i + 1;
|
||||
|
||||
gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
|
||||
gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
|
||||
gnttab_free_head = NR_RESERVED_ENTRIES;
|
||||
|
||||
printk("Grant table initialized\n");
|
||||
return 0;
|
||||
|
||||
ini_nomem:
|
||||
for (i--; i >= 0; i--)
|
||||
free_page((unsigned long)gnttab_list[i]);
|
||||
kfree(gnttab_list);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
core_initcall(gnttab_init);
|
|
@ -0,0 +1,7 @@
|
|||
obj-y += xenbus.o
|
||||
|
||||
xenbus-objs =
|
||||
xenbus-objs += xenbus_client.o
|
||||
xenbus-objs += xenbus_comms.o
|
||||
xenbus-objs += xenbus_xs.o
|
||||
xenbus-objs += xenbus_probe.o
|
|
@ -0,0 +1,569 @@
|
|||
/******************************************************************************
|
||||
* Client-facing interface for the Xenbus driver. In other words, the
|
||||
* interface between the Xenbus and the device-specific code, be it the
|
||||
* frontend or the backend of that driver.
|
||||
*
|
||||
* Copyright (C) 2005 XenSource Ltd
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/event_channel.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/grant_table.h>
|
||||
#include <xen/xenbus.h>
|
||||
|
||||
const char *xenbus_strstate(enum xenbus_state state)
|
||||
{
|
||||
static const char *const name[] = {
|
||||
[ XenbusStateUnknown ] = "Unknown",
|
||||
[ XenbusStateInitialising ] = "Initialising",
|
||||
[ XenbusStateInitWait ] = "InitWait",
|
||||
[ XenbusStateInitialised ] = "Initialised",
|
||||
[ XenbusStateConnected ] = "Connected",
|
||||
[ XenbusStateClosing ] = "Closing",
|
||||
[ XenbusStateClosed ] = "Closed",
|
||||
};
|
||||
return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_strstate);
|
||||
|
||||
/**
|
||||
* xenbus_watch_path - register a watch
|
||||
* @dev: xenbus device
|
||||
* @path: path to watch
|
||||
* @watch: watch to register
|
||||
* @callback: callback to register
|
||||
*
|
||||
* Register a @watch on the given path, using the given xenbus_watch structure
|
||||
* for storage, and the given @callback function as the callback. Return 0 on
|
||||
* success, or -errno on error. On success, the given @path will be saved as
|
||||
* @watch->node, and remains the caller's to free. On error, @watch->node will
|
||||
* be NULL, the device will switch to %XenbusStateClosing, and the error will
|
||||
* be saved in the store.
|
||||
*/
|
||||
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
|
||||
struct xenbus_watch *watch,
|
||||
void (*callback)(struct xenbus_watch *,
|
||||
const char **, unsigned int))
|
||||
{
|
||||
int err;
|
||||
|
||||
watch->node = path;
|
||||
watch->callback = callback;
|
||||
|
||||
err = register_xenbus_watch(watch);
|
||||
|
||||
if (err) {
|
||||
watch->node = NULL;
|
||||
watch->callback = NULL;
|
||||
xenbus_dev_fatal(dev, err, "adding watch on %s", path);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_watch_path);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
|
||||
* @dev: xenbus device
|
||||
* @watch: watch to register
|
||||
* @callback: callback to register
|
||||
* @pathfmt: format of path to watch
|
||||
*
|
||||
* Register a watch on the given @path, using the given xenbus_watch
|
||||
* structure for storage, and the given @callback function as the callback.
|
||||
* Return 0 on success, or -errno on error. On success, the watched path
|
||||
* (@path/@path2) will be saved as @watch->node, and becomes the caller's to
|
||||
* kfree(). On error, watch->node will be NULL, so the caller has nothing to
|
||||
* free, the device will switch to %XenbusStateClosing, and the error will be
|
||||
* saved in the store.
|
||||
*/
|
||||
int xenbus_watch_pathfmt(struct xenbus_device *dev,
|
||||
struct xenbus_watch *watch,
|
||||
void (*callback)(struct xenbus_watch *,
|
||||
const char **, unsigned int),
|
||||
const char *pathfmt, ...)
|
||||
{
|
||||
int err;
|
||||
va_list ap;
|
||||
char *path;
|
||||
|
||||
va_start(ap, pathfmt);
|
||||
path = kvasprintf(GFP_KERNEL, pathfmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (!path) {
|
||||
xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
|
||||
return -ENOMEM;
|
||||
}
|
||||
err = xenbus_watch_path(dev, path, watch, callback);
|
||||
|
||||
if (err)
|
||||
kfree(path);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_switch_state
|
||||
* @dev: xenbus device
|
||||
* @xbt: transaction handle
|
||||
* @state: new state
|
||||
*
|
||||
* Advertise in the store a change of the given driver to the given new_state.
|
||||
* Return 0 on success, or -errno on error. On error, the device will switch
|
||||
* to XenbusStateClosing, and the error will be saved in the store.
|
||||
*/
|
||||
int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
|
||||
{
|
||||
/* We check whether the state is currently set to the given value, and
|
||||
if not, then the state is set. We don't want to unconditionally
|
||||
write the given state, because we don't want to fire watches
|
||||
unnecessarily. Furthermore, if the node has gone, we don't write
|
||||
to it, as the device will be tearing down, and we don't want to
|
||||
resurrect that directory.
|
||||
|
||||
Note that, because of this cached value of our state, this function
|
||||
will not work inside a Xenstore transaction (something it was
|
||||
trying to in the past) because dev->state would not get reset if
|
||||
the transaction was aborted.
|
||||
|
||||
*/
|
||||
|
||||
int current_state;
|
||||
int err;
|
||||
|
||||
if (state == dev->state)
|
||||
return 0;
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
|
||||
¤t_state);
|
||||
if (err != 1)
|
||||
return 0;
|
||||
|
||||
err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
|
||||
if (err) {
|
||||
if (state != XenbusStateClosing) /* Avoid looping */
|
||||
xenbus_dev_fatal(dev, err, "writing new state");
|
||||
return err;
|
||||
}
|
||||
|
||||
dev->state = state;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_switch_state);
|
||||
|
||||
int xenbus_frontend_closed(struct xenbus_device *dev)
|
||||
{
|
||||
xenbus_switch_state(dev, XenbusStateClosed);
|
||||
complete(&dev->down);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
|
||||
|
||||
/**
|
||||
* Return the path to the error node for the given device, or NULL on failure.
|
||||
* If the value returned is non-NULL, then it is the caller's to kfree.
|
||||
*/
|
||||
static char *error_path(struct xenbus_device *dev)
|
||||
{
|
||||
return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
|
||||
}
|
||||
|
||||
|
||||
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
int ret;
|
||||
unsigned int len;
|
||||
char *printf_buffer = NULL;
|
||||
char *path_buffer = NULL;
|
||||
|
||||
#define PRINTF_BUFFER_SIZE 4096
|
||||
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
|
||||
if (printf_buffer == NULL)
|
||||
goto fail;
|
||||
|
||||
len = sprintf(printf_buffer, "%i ", -err);
|
||||
ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
|
||||
|
||||
BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
|
||||
|
||||
dev_err(&dev->dev, "%s\n", printf_buffer);
|
||||
|
||||
path_buffer = error_path(dev);
|
||||
|
||||
if (path_buffer == NULL) {
|
||||
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
|
||||
dev->nodename, printf_buffer);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
|
||||
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
|
||||
dev->nodename, printf_buffer);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
kfree(printf_buffer);
|
||||
kfree(path_buffer);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_dev_error
|
||||
* @dev: xenbus device
|
||||
* @err: error to report
|
||||
* @fmt: error message format
|
||||
*
|
||||
* Report the given negative errno into the store, along with the given
|
||||
* formatted message.
|
||||
*/
|
||||
void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
xenbus_va_dev_error(dev, err, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_dev_error);
|
||||
|
||||
/**
|
||||
* xenbus_dev_fatal
|
||||
* @dev: xenbus device
|
||||
* @err: error to report
|
||||
* @fmt: error message format
|
||||
*
|
||||
* Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
|
||||
* xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
|
||||
* closedown of this driver and its peer.
|
||||
*/
|
||||
|
||||
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
xenbus_va_dev_error(dev, err, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
xenbus_switch_state(dev, XenbusStateClosing);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
|
||||
|
||||
/**
|
||||
* xenbus_grant_ring
|
||||
* @dev: xenbus device
|
||||
* @ring_mfn: mfn of ring to grant
|
||||
|
||||
* Grant access to the given @ring_mfn to the peer of the given device. Return
|
||||
* 0 on success, or -errno on error. On error, the device will switch to
|
||||
* XenbusStateClosing, and the error will be saved in the store.
|
||||
*/
|
||||
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
|
||||
{
|
||||
int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
|
||||
if (err < 0)
|
||||
xenbus_dev_fatal(dev, err, "granting access to ring page");
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_grant_ring);
|
||||
|
||||
|
||||
/**
|
||||
* Allocate an event channel for the given xenbus_device, assigning the newly
|
||||
* created local port to *port. Return 0 on success, or -errno on error. On
|
||||
* error, the device will switch to XenbusStateClosing, and the error will be
|
||||
* saved in the store.
|
||||
*/
|
||||
int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
|
||||
{
|
||||
struct evtchn_alloc_unbound alloc_unbound;
|
||||
int err;
|
||||
|
||||
alloc_unbound.dom = DOMID_SELF;
|
||||
alloc_unbound.remote_dom = dev->otherend_id;
|
||||
|
||||
err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
|
||||
&alloc_unbound);
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err, "allocating event channel");
|
||||
else
|
||||
*port = alloc_unbound.port;
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
|
||||
|
||||
|
||||
/**
|
||||
* Bind to an existing interdomain event channel in another domain. Returns 0
|
||||
* on success and stores the local port in *port. On error, returns -errno,
|
||||
* switches the device to XenbusStateClosing, and saves the error in XenStore.
|
||||
*/
|
||||
int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
|
||||
{
|
||||
struct evtchn_bind_interdomain bind_interdomain;
|
||||
int err;
|
||||
|
||||
bind_interdomain.remote_dom = dev->otherend_id;
|
||||
bind_interdomain.remote_port = remote_port;
|
||||
|
||||
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
|
||||
&bind_interdomain);
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err,
|
||||
"binding to event channel %d from domain %d",
|
||||
remote_port, dev->otherend_id);
|
||||
else
|
||||
*port = bind_interdomain.local_port;
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
|
||||
|
||||
|
||||
/**
|
||||
* Free an existing event channel. Returns 0 on success or -errno on error.
|
||||
*/
|
||||
int xenbus_free_evtchn(struct xenbus_device *dev, int port)
|
||||
{
|
||||
struct evtchn_close close;
|
||||
int err;
|
||||
|
||||
close.port = port;
|
||||
|
||||
err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
|
||||
if (err)
|
||||
xenbus_dev_error(dev, err, "freeing event channel %d", port);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_map_ring_valloc
|
||||
* @dev: xenbus device
|
||||
* @gnt_ref: grant reference
|
||||
* @vaddr: pointer to address to be filled out by mapping
|
||||
*
|
||||
* Based on Rusty Russell's skeleton driver's map_page.
|
||||
* Map a page of memory into this domain from another domain's grant table.
|
||||
* xenbus_map_ring_valloc allocates a page of virtual address space, maps the
|
||||
* page to that address, and sets *vaddr to that address.
|
||||
* Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
|
||||
* or -ENOMEM on error. If an error is returned, device will switch to
|
||||
* XenbusStateClosing and the error message will be saved in XenStore.
|
||||
*/
|
||||
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
|
||||
{
|
||||
struct gnttab_map_grant_ref op = {
|
||||
.flags = GNTMAP_host_map,
|
||||
.ref = gnt_ref,
|
||||
.dom = dev->otherend_id,
|
||||
};
|
||||
struct vm_struct *area;
|
||||
|
||||
*vaddr = NULL;
|
||||
|
||||
area = alloc_vm_area(PAGE_SIZE);
|
||||
if (!area)
|
||||
return -ENOMEM;
|
||||
|
||||
op.host_addr = (unsigned long)area->addr;
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
||||
BUG();
|
||||
|
||||
if (op.status != GNTST_okay) {
|
||||
free_vm_area(area);
|
||||
xenbus_dev_fatal(dev, op.status,
|
||||
"mapping in shared page %d from domain %d",
|
||||
gnt_ref, dev->otherend_id);
|
||||
return op.status;
|
||||
}
|
||||
|
||||
/* Stuff the handle in an unused field */
|
||||
area->phys_addr = (unsigned long)op.handle;
|
||||
|
||||
*vaddr = area->addr;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_map_ring
|
||||
* @dev: xenbus device
|
||||
* @gnt_ref: grant reference
|
||||
* @handle: pointer to grant handle to be filled
|
||||
* @vaddr: address to be mapped to
|
||||
*
|
||||
* Map a page of memory into this domain from another domain's grant table.
|
||||
* xenbus_map_ring does not allocate the virtual address space (you must do
|
||||
* this yourself!). It only maps in the page to the specified address.
|
||||
* Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
|
||||
* or -ENOMEM on error. If an error is returned, device will switch to
|
||||
* XenbusStateClosing and the error message will be saved in XenStore.
|
||||
*/
|
||||
int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
|
||||
grant_handle_t *handle, void *vaddr)
|
||||
{
|
||||
struct gnttab_map_grant_ref op = {
|
||||
.host_addr = (unsigned long)vaddr,
|
||||
.flags = GNTMAP_host_map,
|
||||
.ref = gnt_ref,
|
||||
.dom = dev->otherend_id,
|
||||
};
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
||||
BUG();
|
||||
|
||||
if (op.status != GNTST_okay) {
|
||||
xenbus_dev_fatal(dev, op.status,
|
||||
"mapping in shared page %d from domain %d",
|
||||
gnt_ref, dev->otherend_id);
|
||||
} else
|
||||
*handle = op.handle;
|
||||
|
||||
return op.status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_map_ring);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_unmap_ring_vfree
|
||||
* @dev: xenbus device
|
||||
* @vaddr: addr to unmap
|
||||
*
|
||||
* Based on Rusty Russell's skeleton driver's unmap_page.
|
||||
* Unmap a page of memory in this domain that was imported from another domain.
|
||||
* Use xenbus_unmap_ring_vfree if you mapped in your memory with
|
||||
* xenbus_map_ring_valloc (it will free the virtual address space).
|
||||
* Returns 0 on success and returns GNTST_* on error
|
||||
* (see xen/include/interface/grant_table.h).
|
||||
*/
|
||||
int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
|
||||
{
|
||||
struct vm_struct *area;
|
||||
struct gnttab_unmap_grant_ref op = {
|
||||
.host_addr = (unsigned long)vaddr,
|
||||
};
|
||||
|
||||
/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
|
||||
* method so that we don't have to muck with vmalloc internals here.
|
||||
* We could force the user to hang on to their struct vm_struct from
|
||||
* xenbus_map_ring_valloc, but these 6 lines considerably simplify
|
||||
* this API.
|
||||
*/
|
||||
read_lock(&vmlist_lock);
|
||||
for (area = vmlist; area != NULL; area = area->next) {
|
||||
if (area->addr == vaddr)
|
||||
break;
|
||||
}
|
||||
read_unlock(&vmlist_lock);
|
||||
|
||||
if (!area) {
|
||||
xenbus_dev_error(dev, -ENOENT,
|
||||
"can't find mapped virtual address %p", vaddr);
|
||||
return GNTST_bad_virt_addr;
|
||||
}
|
||||
|
||||
op.handle = (grant_handle_t)area->phys_addr;
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
||||
BUG();
|
||||
|
||||
if (op.status == GNTST_okay)
|
||||
free_vm_area(area);
|
||||
else
|
||||
xenbus_dev_error(dev, op.status,
|
||||
"unmapping page at handle %d error %d",
|
||||
(int16_t)area->phys_addr, op.status);
|
||||
|
||||
return op.status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_unmap_ring
|
||||
* @dev: xenbus device
|
||||
* @handle: grant handle
|
||||
* @vaddr: addr to unmap
|
||||
*
|
||||
* Unmap a page of memory in this domain that was imported from another domain.
|
||||
* Returns 0 on success and returns GNTST_* on error
|
||||
* (see xen/include/interface/grant_table.h).
|
||||
*/
|
||||
int xenbus_unmap_ring(struct xenbus_device *dev,
|
||||
grant_handle_t handle, void *vaddr)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref op = {
|
||||
.host_addr = (unsigned long)vaddr,
|
||||
.handle = handle,
|
||||
};
|
||||
|
||||
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
||||
BUG();
|
||||
|
||||
if (op.status != GNTST_okay)
|
||||
xenbus_dev_error(dev, op.status,
|
||||
"unmapping page at handle %d error %d",
|
||||
handle, op.status);
|
||||
|
||||
return op.status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
|
||||
|
||||
|
||||
/**
|
||||
* xenbus_read_driver_state
|
||||
* @path: path for driver
|
||||
*
|
||||
* Return the state of the driver rooted at the given store path, or
|
||||
* XenbusStateUnknown if no state can be read.
|
||||
*/
|
||||
enum xenbus_state xenbus_read_driver_state(const char *path)
|
||||
{
|
||||
enum xenbus_state result;
|
||||
int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
|
||||
if (err)
|
||||
result = XenbusStateUnknown;
|
||||
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
|
|
@ -0,0 +1,233 @@
|
|||
/******************************************************************************
|
||||
* xenbus_comms.c
|
||||
*
|
||||
* Low level code to talks to Xen Store: ringbuffer and event channel.
|
||||
*
|
||||
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/err.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/page.h>
|
||||
#include "xenbus_comms.h"
|
||||
|
||||
static int xenbus_irq;
|
||||
|
||||
static DECLARE_WORK(probe_work, xenbus_probe);
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
|
||||
|
||||
static irqreturn_t wake_waiting(int irq, void *unused)
|
||||
{
|
||||
if (unlikely(xenstored_ready == 0)) {
|
||||
xenstored_ready = 1;
|
||||
schedule_work(&probe_work);
|
||||
}
|
||||
|
||||
wake_up(&xb_waitq);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
|
||||
{
|
||||
return ((prod - cons) <= XENSTORE_RING_SIZE);
|
||||
}
|
||||
|
||||
static void *get_output_chunk(XENSTORE_RING_IDX cons,
|
||||
XENSTORE_RING_IDX prod,
|
||||
char *buf, uint32_t *len)
|
||||
{
|
||||
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
|
||||
if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
|
||||
*len = XENSTORE_RING_SIZE - (prod - cons);
|
||||
return buf + MASK_XENSTORE_IDX(prod);
|
||||
}
|
||||
|
||||
static const void *get_input_chunk(XENSTORE_RING_IDX cons,
|
||||
XENSTORE_RING_IDX prod,
|
||||
const char *buf, uint32_t *len)
|
||||
{
|
||||
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
|
||||
if ((prod - cons) < *len)
|
||||
*len = prod - cons;
|
||||
return buf + MASK_XENSTORE_IDX(cons);
|
||||
}
|
||||
|
||||
/**
|
||||
* xb_write - low level write
|
||||
* @data: buffer to send
|
||||
* @len: length of buffer
|
||||
*
|
||||
* Returns 0 on success, error otherwise.
|
||||
*/
|
||||
int xb_write(const void *data, unsigned len)
|
||||
{
|
||||
struct xenstore_domain_interface *intf = xen_store_interface;
|
||||
XENSTORE_RING_IDX cons, prod;
|
||||
int rc;
|
||||
|
||||
while (len != 0) {
|
||||
void *dst;
|
||||
unsigned int avail;
|
||||
|
||||
rc = wait_event_interruptible(
|
||||
xb_waitq,
|
||||
(intf->req_prod - intf->req_cons) !=
|
||||
XENSTORE_RING_SIZE);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
/* Read indexes, then verify. */
|
||||
cons = intf->req_cons;
|
||||
prod = intf->req_prod;
|
||||
if (!check_indexes(cons, prod)) {
|
||||
intf->req_cons = intf->req_prod = 0;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
dst = get_output_chunk(cons, prod, intf->req, &avail);
|
||||
if (avail == 0)
|
||||
continue;
|
||||
if (avail > len)
|
||||
avail = len;
|
||||
|
||||
/* Must write data /after/ reading the consumer index. */
|
||||
mb();
|
||||
|
||||
memcpy(dst, data, avail);
|
||||
data += avail;
|
||||
len -= avail;
|
||||
|
||||
/* Other side must not see new producer until data is there. */
|
||||
wmb();
|
||||
intf->req_prod += avail;
|
||||
|
||||
/* Implies mb(): other side will see the updated producer. */
|
||||
notify_remote_via_evtchn(xen_store_evtchn);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xb_data_to_read(void)
|
||||
{
|
||||
struct xenstore_domain_interface *intf = xen_store_interface;
|
||||
return (intf->rsp_cons != intf->rsp_prod);
|
||||
}
|
||||
|
||||
int xb_wait_for_data_to_read(void)
|
||||
{
|
||||
return wait_event_interruptible(xb_waitq, xb_data_to_read());
|
||||
}
|
||||
|
||||
int xb_read(void *data, unsigned len)
|
||||
{
|
||||
struct xenstore_domain_interface *intf = xen_store_interface;
|
||||
XENSTORE_RING_IDX cons, prod;
|
||||
int rc;
|
||||
|
||||
while (len != 0) {
|
||||
unsigned int avail;
|
||||
const char *src;
|
||||
|
||||
rc = xb_wait_for_data_to_read();
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
/* Read indexes, then verify. */
|
||||
cons = intf->rsp_cons;
|
||||
prod = intf->rsp_prod;
|
||||
if (!check_indexes(cons, prod)) {
|
||||
intf->rsp_cons = intf->rsp_prod = 0;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
src = get_input_chunk(cons, prod, intf->rsp, &avail);
|
||||
if (avail == 0)
|
||||
continue;
|
||||
if (avail > len)
|
||||
avail = len;
|
||||
|
||||
/* Must read data /after/ reading the producer index. */
|
||||
rmb();
|
||||
|
||||
memcpy(data, src, avail);
|
||||
data += avail;
|
||||
len -= avail;
|
||||
|
||||
/* Other side must not see free space until we've copied out */
|
||||
mb();
|
||||
intf->rsp_cons += avail;
|
||||
|
||||
pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
|
||||
|
||||
/* Implies mb(): other side will see the updated consumer. */
|
||||
notify_remote_via_evtchn(xen_store_evtchn);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xb_init_comms - Set up interrupt handler off store event channel.
|
||||
*/
|
||||
int xb_init_comms(void)
|
||||
{
|
||||
struct xenstore_domain_interface *intf = xen_store_interface;
|
||||
int err;
|
||||
|
||||
if (intf->req_prod != intf->req_cons)
|
||||
printk(KERN_ERR "XENBUS request ring is not quiescent "
|
||||
"(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
|
||||
|
||||
if (intf->rsp_prod != intf->rsp_cons) {
|
||||
printk(KERN_WARNING "XENBUS response ring is not quiescent "
|
||||
"(%08x:%08x): fixing up\n",
|
||||
intf->rsp_cons, intf->rsp_prod);
|
||||
intf->rsp_cons = intf->rsp_prod;
|
||||
}
|
||||
|
||||
if (xenbus_irq)
|
||||
unbind_from_irqhandler(xenbus_irq, &xb_waitq);
|
||||
|
||||
err = bind_evtchn_to_irqhandler(
|
||||
xen_store_evtchn, wake_waiting,
|
||||
0, "xenbus", &xb_waitq);
|
||||
if (err <= 0) {
|
||||
printk(KERN_ERR "XENBUS request irq failed %i\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
xenbus_irq = err;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Private include for xenbus communications.
|
||||
*
|
||||
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _XENBUS_COMMS_H
|
||||
#define _XENBUS_COMMS_H
|
||||
|
||||
int xs_init(void);
|
||||
int xb_init_comms(void);
|
||||
|
||||
/* Low level routines. */
|
||||
int xb_write(const void *data, unsigned len);
|
||||
int xb_read(void *data, unsigned len);
|
||||
int xb_data_to_read(void);
|
||||
int xb_wait_for_data_to_read(void);
|
||||
int xs_input_avail(void);
|
||||
extern struct xenstore_domain_interface *xen_store_interface;
|
||||
extern int xen_store_evtchn;
|
||||
|
||||
#endif /* _XENBUS_COMMS_H */
|
|
@ -0,0 +1,935 @@
|
|||
/******************************************************************************
|
||||
* Talks to Xen Store to figure out what devices we have.
|
||||
*
|
||||
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
||||
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
|
||||
* Copyright (C) 2005, 2006 XenSource Ltd
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define DPRINTK(fmt, args...) \
|
||||
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
|
||||
__func__, __LINE__, ##args)
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/fcntl.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <xen/events.h>
|
||||
#include <xen/page.h>
|
||||
|
||||
#include "xenbus_comms.h"
|
||||
#include "xenbus_probe.h"
|
||||
|
||||
int xen_store_evtchn;
|
||||
struct xenstore_domain_interface *xen_store_interface;
|
||||
static unsigned long xen_store_mfn;
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
|
||||
|
||||
static void wait_for_devices(struct xenbus_driver *xendrv);
|
||||
|
||||
static int xenbus_probe_frontend(const char *type, const char *name);
|
||||
|
||||
static void xenbus_dev_shutdown(struct device *_dev);
|
||||
|
||||
/* If something in array of ids matches this device, return it. */
|
||||
static const struct xenbus_device_id *
|
||||
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
|
||||
{
|
||||
for (; *arr->devicetype != '\0'; arr++) {
|
||||
if (!strcmp(arr->devicetype, dev->devicetype))
|
||||
return arr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int xenbus_match(struct device *_dev, struct device_driver *_drv)
|
||||
{
|
||||
struct xenbus_driver *drv = to_xenbus_driver(_drv);
|
||||
|
||||
if (!drv->ids)
|
||||
return 0;
|
||||
|
||||
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
|
||||
}
|
||||
|
||||
/* device/<type>/<id> => <type>-<id> */
|
||||
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
|
||||
{
|
||||
nodename = strchr(nodename, '/');
|
||||
if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
|
||||
printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
|
||||
if (!strchr(bus_id, '/')) {
|
||||
printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
*strchr(bus_id, '/') = '-';
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void free_otherend_details(struct xenbus_device *dev)
|
||||
{
|
||||
kfree(dev->otherend);
|
||||
dev->otherend = NULL;
|
||||
}
|
||||
|
||||
|
||||
static void free_otherend_watch(struct xenbus_device *dev)
|
||||
{
|
||||
if (dev->otherend_watch.node) {
|
||||
unregister_xenbus_watch(&dev->otherend_watch);
|
||||
kfree(dev->otherend_watch.node);
|
||||
dev->otherend_watch.node = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int read_otherend_details(struct xenbus_device *xendev,
|
||||
char *id_node, char *path_node)
|
||||
{
|
||||
int err = xenbus_gather(XBT_NIL, xendev->nodename,
|
||||
id_node, "%i", &xendev->otherend_id,
|
||||
path_node, NULL, &xendev->otherend,
|
||||
NULL);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(xendev, err,
|
||||
"reading other end details from %s",
|
||||
xendev->nodename);
|
||||
return err;
|
||||
}
|
||||
if (strlen(xendev->otherend) == 0 ||
|
||||
!xenbus_exists(XBT_NIL, xendev->otherend, "")) {
|
||||
xenbus_dev_fatal(xendev, -ENOENT,
|
||||
"unable to read other end from %s. "
|
||||
"missing or inaccessible.",
|
||||
xendev->nodename);
|
||||
free_otherend_details(xendev);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int read_backend_details(struct xenbus_device *xendev)
|
||||
{
|
||||
return read_otherend_details(xendev, "backend-id", "backend");
|
||||
}
|
||||
|
||||
|
||||
/* Bus type for frontend drivers. */
|
||||
static struct xen_bus_type xenbus_frontend = {
|
||||
.root = "device",
|
||||
.levels = 2, /* device/type/<id> */
|
||||
.get_bus_id = frontend_bus_id,
|
||||
.probe = xenbus_probe_frontend,
|
||||
.bus = {
|
||||
.name = "xen",
|
||||
.match = xenbus_match,
|
||||
.probe = xenbus_dev_probe,
|
||||
.remove = xenbus_dev_remove,
|
||||
.shutdown = xenbus_dev_shutdown,
|
||||
},
|
||||
};
|
||||
|
||||
static void otherend_changed(struct xenbus_watch *watch,
|
||||
const char **vec, unsigned int len)
|
||||
{
|
||||
struct xenbus_device *dev =
|
||||
container_of(watch, struct xenbus_device, otherend_watch);
|
||||
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
|
||||
enum xenbus_state state;
|
||||
|
||||
/* Protect us against watches firing on old details when the otherend
|
||||
details change, say immediately after a resume. */
|
||||
if (!dev->otherend ||
|
||||
strncmp(dev->otherend, vec[XS_WATCH_PATH],
|
||||
strlen(dev->otherend))) {
|
||||
dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
|
||||
return;
|
||||
}
|
||||
|
||||
state = xenbus_read_driver_state(dev->otherend);
|
||||
|
||||
dev_dbg(&dev->dev, "state is %d, (%s), %s, %s",
|
||||
state, xenbus_strstate(state), dev->otherend_watch.node,
|
||||
vec[XS_WATCH_PATH]);
|
||||
|
||||
/*
|
||||
* Ignore xenbus transitions during shutdown. This prevents us doing
|
||||
* work that can fail e.g., when the rootfs is gone.
|
||||
*/
|
||||
if (system_state > SYSTEM_RUNNING) {
|
||||
struct xen_bus_type *bus = bus;
|
||||
bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
|
||||
/* If we're frontend, drive the state machine to Closed. */
|
||||
/* This should cause the backend to release our resources. */
|
||||
if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
|
||||
xenbus_frontend_closed(dev);
|
||||
return;
|
||||
}
|
||||
|
||||
if (drv->otherend_changed)
|
||||
drv->otherend_changed(dev, state);
|
||||
}
|
||||
|
||||
|
||||
static int talk_to_otherend(struct xenbus_device *dev)
|
||||
{
|
||||
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
|
||||
|
||||
free_otherend_watch(dev);
|
||||
free_otherend_details(dev);
|
||||
|
||||
return drv->read_otherend_details(dev);
|
||||
}
|
||||
|
||||
|
||||
static int watch_otherend(struct xenbus_device *dev)
|
||||
{
|
||||
return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
|
||||
"%s/%s", dev->otherend, "state");
|
||||
}
|
||||
|
||||
|
||||
int xenbus_dev_probe(struct device *_dev)
|
||||
{
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev);
|
||||
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
|
||||
const struct xenbus_device_id *id;
|
||||
int err;
|
||||
|
||||
DPRINTK("%s", dev->nodename);
|
||||
|
||||
if (!drv->probe) {
|
||||
err = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
id = match_device(drv->ids, dev);
|
||||
if (!id) {
|
||||
err = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = talk_to_otherend(dev);
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
|
||||
dev->nodename);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = drv->probe(dev, id);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = watch_otherend(dev);
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
|
||||
dev->nodename);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
|
||||
xenbus_switch_state(dev, XenbusStateClosed);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
int xenbus_dev_remove(struct device *_dev)
|
||||
{
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev);
|
||||
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
|
||||
|
||||
DPRINTK("%s", dev->nodename);
|
||||
|
||||
free_otherend_watch(dev);
|
||||
free_otherend_details(dev);
|
||||
|
||||
if (drv->remove)
|
||||
drv->remove(dev);
|
||||
|
||||
xenbus_switch_state(dev, XenbusStateClosed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xenbus_dev_shutdown(struct device *_dev)
|
||||
{
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev);
|
||||
unsigned long timeout = 5*HZ;
|
||||
|
||||
DPRINTK("%s", dev->nodename);
|
||||
|
||||
get_device(&dev->dev);
|
||||
if (dev->state != XenbusStateConnected) {
|
||||
printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
|
||||
dev->nodename, xenbus_strstate(dev->state));
|
||||
goto out;
|
||||
}
|
||||
xenbus_switch_state(dev, XenbusStateClosing);
|
||||
timeout = wait_for_completion_timeout(&dev->down, timeout);
|
||||
if (!timeout)
|
||||
printk(KERN_INFO "%s: %s timeout closing device\n",
|
||||
__func__, dev->nodename);
|
||||
out:
|
||||
put_device(&dev->dev);
|
||||
}
|
||||
|
||||
int xenbus_register_driver_common(struct xenbus_driver *drv,
|
||||
struct xen_bus_type *bus,
|
||||
struct module *owner,
|
||||
const char *mod_name)
|
||||
{
|
||||
drv->driver.name = drv->name;
|
||||
drv->driver.bus = &bus->bus;
|
||||
drv->driver.owner = owner;
|
||||
drv->driver.mod_name = mod_name;
|
||||
|
||||
return driver_register(&drv->driver);
|
||||
}
|
||||
|
||||
int __xenbus_register_frontend(struct xenbus_driver *drv,
|
||||
struct module *owner, const char *mod_name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
drv->read_otherend_details = read_backend_details;
|
||||
|
||||
ret = xenbus_register_driver_common(drv, &xenbus_frontend,
|
||||
owner, mod_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* If this driver is loaded as a module wait for devices to attach. */
|
||||
wait_for_devices(drv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
|
||||
|
||||
void xenbus_unregister_driver(struct xenbus_driver *drv)
|
||||
{
|
||||
driver_unregister(&drv->driver);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
|
||||
|
||||
struct xb_find_info
|
||||
{
|
||||
struct xenbus_device *dev;
|
||||
const char *nodename;
|
||||
};
|
||||
|
||||
static int cmp_dev(struct device *dev, void *data)
|
||||
{
|
||||
struct xenbus_device *xendev = to_xenbus_device(dev);
|
||||
struct xb_find_info *info = data;
|
||||
|
||||
if (!strcmp(xendev->nodename, info->nodename)) {
|
||||
info->dev = xendev;
|
||||
get_device(dev);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct xenbus_device *xenbus_device_find(const char *nodename,
|
||||
struct bus_type *bus)
|
||||
{
|
||||
struct xb_find_info info = { .dev = NULL, .nodename = nodename };
|
||||
|
||||
bus_for_each_dev(bus, NULL, &info, cmp_dev);
|
||||
return info.dev;
|
||||
}
|
||||
|
||||
static int cleanup_dev(struct device *dev, void *data)
|
||||
{
|
||||
struct xenbus_device *xendev = to_xenbus_device(dev);
|
||||
struct xb_find_info *info = data;
|
||||
int len = strlen(info->nodename);
|
||||
|
||||
DPRINTK("%s", info->nodename);
|
||||
|
||||
/* Match the info->nodename path, or any subdirectory of that path. */
|
||||
if (strncmp(xendev->nodename, info->nodename, len))
|
||||
return 0;
|
||||
|
||||
/* If the node name is longer, ensure it really is a subdirectory. */
|
||||
if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
|
||||
return 0;
|
||||
|
||||
info->dev = xendev;
|
||||
get_device(dev);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
|
||||
{
|
||||
struct xb_find_info info = { .nodename = path };
|
||||
|
||||
do {
|
||||
info.dev = NULL;
|
||||
bus_for_each_dev(bus, NULL, &info, cleanup_dev);
|
||||
if (info.dev) {
|
||||
device_unregister(&info.dev->dev);
|
||||
put_device(&info.dev->dev);
|
||||
}
|
||||
} while (info.dev);
|
||||
}
|
||||
|
||||
static void xenbus_dev_release(struct device *dev)
|
||||
{
|
||||
if (dev)
|
||||
kfree(to_xenbus_device(dev));
|
||||
}
|
||||
|
||||
static ssize_t xendev_show_nodename(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
|
||||
}
|
||||
DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
|
||||
|
||||
static ssize_t xendev_show_devtype(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
|
||||
}
|
||||
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
|
||||
|
||||
|
||||
int xenbus_probe_node(struct xen_bus_type *bus,
|
||||
const char *type,
|
||||
const char *nodename)
|
||||
{
|
||||
int err;
|
||||
struct xenbus_device *xendev;
|
||||
size_t stringlen;
|
||||
char *tmpstring;
|
||||
|
||||
enum xenbus_state state = xenbus_read_driver_state(nodename);
|
||||
|
||||
if (state != XenbusStateInitialising) {
|
||||
/* Device is not new, so ignore it. This can happen if a
|
||||
device is going away after switching to Closed. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
stringlen = strlen(nodename) + 1 + strlen(type) + 1;
|
||||
xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
|
||||
if (!xendev)
|
||||
return -ENOMEM;
|
||||
|
||||
xendev->state = XenbusStateInitialising;
|
||||
|
||||
/* Copy the strings into the extra space. */
|
||||
|
||||
tmpstring = (char *)(xendev + 1);
|
||||
strcpy(tmpstring, nodename);
|
||||
xendev->nodename = tmpstring;
|
||||
|
||||
tmpstring += strlen(tmpstring) + 1;
|
||||
strcpy(tmpstring, type);
|
||||
xendev->devicetype = tmpstring;
|
||||
init_completion(&xendev->down);
|
||||
|
||||
xendev->dev.bus = &bus->bus;
|
||||
xendev->dev.release = xenbus_dev_release;
|
||||
|
||||
err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
/* Register with generic device framework. */
|
||||
err = device_register(&xendev->dev);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = device_create_file(&xendev->dev, &dev_attr_nodename);
|
||||
if (err)
|
||||
goto fail_unregister;
|
||||
|
||||
err = device_create_file(&xendev->dev, &dev_attr_devtype);
|
||||
if (err)
|
||||
goto fail_remove_file;
|
||||
|
||||
return 0;
|
||||
fail_remove_file:
|
||||
device_remove_file(&xendev->dev, &dev_attr_nodename);
|
||||
fail_unregister:
|
||||
device_unregister(&xendev->dev);
|
||||
fail:
|
||||
kfree(xendev);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* device/<typename>/<name> */
|
||||
static int xenbus_probe_frontend(const char *type, const char *name)
|
||||
{
|
||||
char *nodename;
|
||||
int err;
|
||||
|
||||
nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
|
||||
xenbus_frontend.root, type, name);
|
||||
if (!nodename)
|
||||
return -ENOMEM;
|
||||
|
||||
DPRINTK("%s", nodename);
|
||||
|
||||
err = xenbus_probe_node(&xenbus_frontend, type, nodename);
|
||||
kfree(nodename);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
|
||||
{
|
||||
int err = 0;
|
||||
char **dir;
|
||||
unsigned int dir_n = 0;
|
||||
int i;
|
||||
|
||||
dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
for (i = 0; i < dir_n; i++) {
|
||||
err = bus->probe(type, dir[i]);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
kfree(dir);
|
||||
return err;
|
||||
}
|
||||
|
||||
int xenbus_probe_devices(struct xen_bus_type *bus)
|
||||
{
|
||||
int err = 0;
|
||||
char **dir;
|
||||
unsigned int i, dir_n;
|
||||
|
||||
dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
for (i = 0; i < dir_n; i++) {
|
||||
err = xenbus_probe_device_type(bus, dir[i]);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
kfree(dir);
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned int char_count(const char *str, char c)
|
||||
{
|
||||
unsigned int i, ret = 0;
|
||||
|
||||
for (i = 0; str[i]; i++)
|
||||
if (str[i] == c)
|
||||
ret++;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int strsep_len(const char *str, char c, unsigned int len)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; str[i]; i++)
|
||||
if (str[i] == c) {
|
||||
if (len == 0)
|
||||
return i;
|
||||
len--;
|
||||
}
|
||||
return (len == 0) ? i : -ERANGE;
|
||||
}
|
||||
|
||||
void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
|
||||
{
|
||||
int exists, rootlen;
|
||||
struct xenbus_device *dev;
|
||||
char type[BUS_ID_SIZE];
|
||||
const char *p, *root;
|
||||
|
||||
if (char_count(node, '/') < 2)
|
||||
return;
|
||||
|
||||
exists = xenbus_exists(XBT_NIL, node, "");
|
||||
if (!exists) {
|
||||
xenbus_cleanup_devices(node, &bus->bus);
|
||||
return;
|
||||
}
|
||||
|
||||
/* backend/<type>/... or device/<type>/... */
|
||||
p = strchr(node, '/') + 1;
|
||||
snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
|
||||
type[BUS_ID_SIZE-1] = '\0';
|
||||
|
||||
rootlen = strsep_len(node, '/', bus->levels);
|
||||
if (rootlen < 0)
|
||||
return;
|
||||
root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
|
||||
if (!root)
|
||||
return;
|
||||
|
||||
dev = xenbus_device_find(root, &bus->bus);
|
||||
if (!dev)
|
||||
xenbus_probe_node(bus, type, root);
|
||||
else
|
||||
put_device(&dev->dev);
|
||||
|
||||
kfree(root);
|
||||
}
|
||||
|
||||
static void frontend_changed(struct xenbus_watch *watch,
|
||||
const char **vec, unsigned int len)
|
||||
{
|
||||
DPRINTK("");
|
||||
|
||||
xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
|
||||
}
|
||||
|
||||
/* We watch for devices appearing and vanishing. */
|
||||
static struct xenbus_watch fe_watch = {
|
||||
.node = "device",
|
||||
.callback = frontend_changed,
|
||||
};
|
||||
|
||||
static int suspend_dev(struct device *dev, void *data)
|
||||
{
|
||||
int err = 0;
|
||||
struct xenbus_driver *drv;
|
||||
struct xenbus_device *xdev;
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
if (dev->driver == NULL)
|
||||
return 0;
|
||||
drv = to_xenbus_driver(dev->driver);
|
||||
xdev = container_of(dev, struct xenbus_device, dev);
|
||||
if (drv->suspend)
|
||||
err = drv->suspend(xdev);
|
||||
if (err)
|
||||
printk(KERN_WARNING
|
||||
"xenbus: suspend %s failed: %i\n", dev->bus_id, err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int suspend_cancel_dev(struct device *dev, void *data)
|
||||
{
|
||||
int err = 0;
|
||||
struct xenbus_driver *drv;
|
||||
struct xenbus_device *xdev;
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
if (dev->driver == NULL)
|
||||
return 0;
|
||||
drv = to_xenbus_driver(dev->driver);
|
||||
xdev = container_of(dev, struct xenbus_device, dev);
|
||||
if (drv->suspend_cancel)
|
||||
err = drv->suspend_cancel(xdev);
|
||||
if (err)
|
||||
printk(KERN_WARNING
|
||||
"xenbus: suspend_cancel %s failed: %i\n",
|
||||
dev->bus_id, err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int resume_dev(struct device *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
struct xenbus_driver *drv;
|
||||
struct xenbus_device *xdev;
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
if (dev->driver == NULL)
|
||||
return 0;
|
||||
|
||||
drv = to_xenbus_driver(dev->driver);
|
||||
xdev = container_of(dev, struct xenbus_device, dev);
|
||||
|
||||
err = talk_to_otherend(xdev);
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
"xenbus: resume (talk_to_otherend) %s failed: %i\n",
|
||||
dev->bus_id, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
xdev->state = XenbusStateInitialising;
|
||||
|
||||
if (drv->resume) {
|
||||
err = drv->resume(xdev);
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
"xenbus: resume %s failed: %i\n",
|
||||
dev->bus_id, err);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = watch_otherend(xdev);
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
"xenbus_probe: resume (watch_otherend) %s failed: "
|
||||
"%d.\n", dev->bus_id, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xenbus_suspend(void)
|
||||
{
|
||||
DPRINTK("");
|
||||
|
||||
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
|
||||
xenbus_backend_suspend(suspend_dev);
|
||||
xs_suspend();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_suspend);
|
||||
|
||||
void xenbus_resume(void)
|
||||
{
|
||||
xb_init_comms();
|
||||
xs_resume();
|
||||
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
|
||||
xenbus_backend_resume(resume_dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_resume);
|
||||
|
||||
void xenbus_suspend_cancel(void)
|
||||
{
|
||||
xs_suspend_cancel();
|
||||
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
|
||||
xenbus_backend_resume(suspend_cancel_dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
|
||||
|
||||
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
|
||||
int xenstored_ready = 0;
|
||||
|
||||
|
||||
int register_xenstore_notifier(struct notifier_block *nb)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (xenstored_ready > 0)
|
||||
ret = nb->notifier_call(nb, 0, NULL);
|
||||
else
|
||||
blocking_notifier_chain_register(&xenstore_chain, nb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_xenstore_notifier);
|
||||
|
||||
void unregister_xenstore_notifier(struct notifier_block *nb)
|
||||
{
|
||||
blocking_notifier_chain_unregister(&xenstore_chain, nb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
|
||||
|
||||
void xenbus_probe(struct work_struct *unused)
|
||||
{
|
||||
BUG_ON((xenstored_ready <= 0));
|
||||
|
||||
/* Enumerate devices in xenstore and watch for changes. */
|
||||
xenbus_probe_devices(&xenbus_frontend);
|
||||
register_xenbus_watch(&fe_watch);
|
||||
xenbus_backend_probe_and_watch();
|
||||
|
||||
/* Notify others that xenstore is up */
|
||||
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
|
||||
}
|
||||
|
||||
static int __init xenbus_probe_init(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
DPRINTK("");
|
||||
|
||||
err = -ENODEV;
|
||||
if (!is_running_on_xen())
|
||||
goto out_error;
|
||||
|
||||
/* Register ourselves with the kernel bus subsystem */
|
||||
err = bus_register(&xenbus_frontend.bus);
|
||||
if (err)
|
||||
goto out_error;
|
||||
|
||||
err = xenbus_backend_bus_register();
|
||||
if (err)
|
||||
goto out_unreg_front;
|
||||
|
||||
/*
|
||||
* Domain0 doesn't have a store_evtchn or store_mfn yet.
|
||||
*/
|
||||
if (is_initial_xendomain()) {
|
||||
/* dom0 not yet supported */
|
||||
} else {
|
||||
xenstored_ready = 1;
|
||||
xen_store_evtchn = xen_start_info->store_evtchn;
|
||||
xen_store_mfn = xen_start_info->store_mfn;
|
||||
}
|
||||
xen_store_interface = mfn_to_virt(xen_store_mfn);
|
||||
|
||||
/* Initialize the interface to xenstore. */
|
||||
err = xs_init();
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
"XENBUS: Error initializing xenstore comms: %i\n", err);
|
||||
goto out_unreg_back;
|
||||
}
|
||||
|
||||
if (!is_initial_xendomain())
|
||||
xenbus_probe(NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
out_unreg_back:
|
||||
xenbus_backend_bus_unregister();
|
||||
|
||||
out_unreg_front:
|
||||
bus_unregister(&xenbus_frontend.bus);
|
||||
|
||||
out_error:
|
||||
return err;
|
||||
}
|
||||
|
||||
postcore_initcall(xenbus_probe_init);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int is_disconnected_device(struct device *dev, void *data)
|
||||
{
|
||||
struct xenbus_device *xendev = to_xenbus_device(dev);
|
||||
struct device_driver *drv = data;
|
||||
|
||||
/*
|
||||
* A device with no driver will never connect. We care only about
|
||||
* devices which should currently be in the process of connecting.
|
||||
*/
|
||||
if (!dev->driver)
|
||||
return 0;
|
||||
|
||||
/* Is this search limited to a particular driver? */
|
||||
if (drv && (dev->driver != drv))
|
||||
return 0;
|
||||
|
||||
return (xendev->state != XenbusStateConnected);
|
||||
}
|
||||
|
||||
static int exists_disconnected_device(struct device_driver *drv)
|
||||
{
|
||||
return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
|
||||
is_disconnected_device);
|
||||
}
|
||||
|
||||
static int print_device_status(struct device *dev, void *data)
|
||||
{
|
||||
struct xenbus_device *xendev = to_xenbus_device(dev);
|
||||
struct device_driver *drv = data;
|
||||
|
||||
/* Is this operation limited to a particular driver? */
|
||||
if (drv && (dev->driver != drv))
|
||||
return 0;
|
||||
|
||||
if (!dev->driver) {
|
||||
/* Information only: is this too noisy? */
|
||||
printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
|
||||
xendev->nodename);
|
||||
} else if (xendev->state != XenbusStateConnected) {
|
||||
printk(KERN_WARNING "XENBUS: Timeout connecting "
|
||||
"to device: %s (state %d)\n",
|
||||
xendev->nodename, xendev->state);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We only wait for device setup after most initcalls have run. */
|
||||
static int ready_to_wait_for_devices;
|
||||
|
||||
/*
|
||||
* On a 10 second timeout, wait for all devices currently configured. We need
|
||||
* to do this to guarantee that the filesystems and / or network devices
|
||||
* needed for boot are available, before we can allow the boot to proceed.
|
||||
*
|
||||
* This needs to be on a late_initcall, to happen after the frontend device
|
||||
* drivers have been initialised, but before the root fs is mounted.
|
||||
*
|
||||
* A possible improvement here would be to have the tools add a per-device
|
||||
* flag to the store entry, indicating whether it is needed at boot time.
|
||||
* This would allow people who knew what they were doing to accelerate their
|
||||
* boot slightly, but of course needs tools or manual intervention to set up
|
||||
* those flags correctly.
|
||||
*/
|
||||
static void wait_for_devices(struct xenbus_driver *xendrv)
|
||||
{
|
||||
unsigned long timeout = jiffies + 10*HZ;
|
||||
struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
|
||||
|
||||
if (!ready_to_wait_for_devices || !is_running_on_xen())
|
||||
return;
|
||||
|
||||
while (exists_disconnected_device(drv)) {
|
||||
if (time_after(jiffies, timeout))
|
||||
break;
|
||||
schedule_timeout_interruptible(HZ/10);
|
||||
}
|
||||
|
||||
bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
|
||||
print_device_status);
|
||||
}
|
||||
|
||||
#ifndef MODULE
|
||||
static int __init boot_wait_for_devices(void)
|
||||
{
|
||||
ready_to_wait_for_devices = 1;
|
||||
wait_for_devices(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(boot_wait_for_devices);
|
||||
#endif
|
|
@ -0,0 +1,74 @@
|
|||
/******************************************************************************
|
||||
* xenbus_probe.h
|
||||
*
|
||||
* Talks to Xen Store to figure out what devices we have.
|
||||
*
|
||||
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
||||
* Copyright (C) 2005 XenSource Ltd.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _XENBUS_PROBE_H
|
||||
#define _XENBUS_PROBE_H
|
||||
|
||||
#ifdef CONFIG_XEN_BACKEND
|
||||
extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
|
||||
extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
|
||||
extern void xenbus_backend_probe_and_watch(void);
|
||||
extern int xenbus_backend_bus_register(void);
|
||||
extern void xenbus_backend_bus_unregister(void);
|
||||
#else
|
||||
static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
|
||||
static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
|
||||
static inline void xenbus_backend_probe_and_watch(void) {}
|
||||
static inline int xenbus_backend_bus_register(void) { return 0; }
|
||||
static inline void xenbus_backend_bus_unregister(void) {}
|
||||
#endif
|
||||
|
||||
struct xen_bus_type
|
||||
{
|
||||
char *root;
|
||||
unsigned int levels;
|
||||
int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
|
||||
int (*probe)(const char *type, const char *dir);
|
||||
struct bus_type bus;
|
||||
};
|
||||
|
||||
extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
|
||||
extern int xenbus_dev_probe(struct device *_dev);
|
||||
extern int xenbus_dev_remove(struct device *_dev);
|
||||
extern int xenbus_register_driver_common(struct xenbus_driver *drv,
|
||||
struct xen_bus_type *bus,
|
||||
struct module *owner,
|
||||
const char *mod_name);
|
||||
extern int xenbus_probe_node(struct xen_bus_type *bus,
|
||||
const char *type,
|
||||
const char *nodename);
|
||||
extern int xenbus_probe_devices(struct xen_bus_type *bus);
|
||||
|
||||
extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,861 @@
|
|||
/******************************************************************************
|
||||
* xenbus_xs.c
|
||||
*
|
||||
* This is the kernel equivalent of the "xs" library. We don't need everything
|
||||
* and we use xenbus_comms for communication.
|
||||
*
|
||||
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/fcntl.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include "xenbus_comms.h"
|
||||
|
||||
struct xs_stored_msg {
|
||||
struct list_head list;
|
||||
|
||||
struct xsd_sockmsg hdr;
|
||||
|
||||
union {
|
||||
/* Queued replies. */
|
||||
struct {
|
||||
char *body;
|
||||
} reply;
|
||||
|
||||
/* Queued watch events. */
|
||||
struct {
|
||||
struct xenbus_watch *handle;
|
||||
char **vec;
|
||||
unsigned int vec_size;
|
||||
} watch;
|
||||
} u;
|
||||
};
|
||||
|
||||
struct xs_handle {
|
||||
/* A list of replies. Currently only one will ever be outstanding. */
|
||||
struct list_head reply_list;
|
||||
spinlock_t reply_lock;
|
||||
wait_queue_head_t reply_waitq;
|
||||
|
||||
/*
|
||||
* Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
|
||||
* response_mutex is never taken simultaneously with the other three.
|
||||
*/
|
||||
|
||||
/* One request at a time. */
|
||||
struct mutex request_mutex;
|
||||
|
||||
/* Protect xenbus reader thread against save/restore. */
|
||||
struct mutex response_mutex;
|
||||
|
||||
/* Protect transactions against save/restore. */
|
||||
struct rw_semaphore transaction_mutex;
|
||||
|
||||
/* Protect watch (de)register against save/restore. */
|
||||
struct rw_semaphore watch_mutex;
|
||||
};
|
||||
|
||||
static struct xs_handle xs_state;
|
||||
|
||||
/* List of registered watches, and a lock to protect it. */
|
||||
static LIST_HEAD(watches);
|
||||
static DEFINE_SPINLOCK(watches_lock);
|
||||
|
||||
/* List of pending watch callback events, and a lock to protect it. */
|
||||
static LIST_HEAD(watch_events);
|
||||
static DEFINE_SPINLOCK(watch_events_lock);
|
||||
|
||||
/*
|
||||
* Details of the xenwatch callback kernel thread. The thread waits on the
|
||||
* watch_events_waitq for work to do (queued on watch_events list). When it
|
||||
* wakes up it acquires the xenwatch_mutex before reading the list and
|
||||
* carrying out work.
|
||||
*/
|
||||
static pid_t xenwatch_pid;
|
||||
static DEFINE_MUTEX(xenwatch_mutex);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
|
||||
|
||||
static int get_error(const char *errorstring)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
|
||||
if (i == ARRAY_SIZE(xsd_errors) - 1) {
|
||||
printk(KERN_WARNING
|
||||
"XENBUS xen store gave: unknown error %s",
|
||||
errorstring);
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
return xsd_errors[i].errnum;
|
||||
}
|
||||
|
||||
static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
|
||||
{
|
||||
struct xs_stored_msg *msg;
|
||||
char *body;
|
||||
|
||||
spin_lock(&xs_state.reply_lock);
|
||||
|
||||
while (list_empty(&xs_state.reply_list)) {
|
||||
spin_unlock(&xs_state.reply_lock);
|
||||
/* XXX FIXME: Avoid synchronous wait for response here. */
|
||||
wait_event(xs_state.reply_waitq,
|
||||
!list_empty(&xs_state.reply_list));
|
||||
spin_lock(&xs_state.reply_lock);
|
||||
}
|
||||
|
||||
msg = list_entry(xs_state.reply_list.next,
|
||||
struct xs_stored_msg, list);
|
||||
list_del(&msg->list);
|
||||
|
||||
spin_unlock(&xs_state.reply_lock);
|
||||
|
||||
*type = msg->hdr.type;
|
||||
if (len)
|
||||
*len = msg->hdr.len;
|
||||
body = msg->u.reply.body;
|
||||
|
||||
kfree(msg);
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
|
||||
{
|
||||
void *ret;
|
||||
struct xsd_sockmsg req_msg = *msg;
|
||||
int err;
|
||||
|
||||
if (req_msg.type == XS_TRANSACTION_START)
|
||||
down_read(&xs_state.transaction_mutex);
|
||||
|
||||
mutex_lock(&xs_state.request_mutex);
|
||||
|
||||
err = xb_write(msg, sizeof(*msg) + msg->len);
|
||||
if (err) {
|
||||
msg->type = XS_ERROR;
|
||||
ret = ERR_PTR(err);
|
||||
} else
|
||||
ret = read_reply(&msg->type, &msg->len);
|
||||
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
|
||||
if ((msg->type == XS_TRANSACTION_END) ||
|
||||
((req_msg.type == XS_TRANSACTION_START) &&
|
||||
(msg->type == XS_ERROR)))
|
||||
up_read(&xs_state.transaction_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
|
||||
static void *xs_talkv(struct xenbus_transaction t,
|
||||
enum xsd_sockmsg_type type,
|
||||
const struct kvec *iovec,
|
||||
unsigned int num_vecs,
|
||||
unsigned int *len)
|
||||
{
|
||||
struct xsd_sockmsg msg;
|
||||
void *ret = NULL;
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
msg.tx_id = t.id;
|
||||
msg.req_id = 0;
|
||||
msg.type = type;
|
||||
msg.len = 0;
|
||||
for (i = 0; i < num_vecs; i++)
|
||||
msg.len += iovec[i].iov_len;
|
||||
|
||||
mutex_lock(&xs_state.request_mutex);
|
||||
|
||||
err = xb_write(&msg, sizeof(msg));
|
||||
if (err) {
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_vecs; i++) {
|
||||
err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
|
||||
if (err) {
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
}
|
||||
|
||||
ret = read_reply(&msg.type, len);
|
||||
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
if (msg.type == XS_ERROR) {
|
||||
err = get_error(ret);
|
||||
kfree(ret);
|
||||
return ERR_PTR(-err);
|
||||
}
|
||||
|
||||
if (msg.type != type) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_WARNING
|
||||
"XENBUS unexpected type [%d], expected [%d]\n",
|
||||
msg.type, type);
|
||||
kfree(ret);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Simplified version of xs_talkv: single message. */
|
||||
static void *xs_single(struct xenbus_transaction t,
|
||||
enum xsd_sockmsg_type type,
|
||||
const char *string,
|
||||
unsigned int *len)
|
||||
{
|
||||
struct kvec iovec;
|
||||
|
||||
iovec.iov_base = (void *)string;
|
||||
iovec.iov_len = strlen(string) + 1;
|
||||
return xs_talkv(t, type, &iovec, 1, len);
|
||||
}
|
||||
|
||||
/* Many commands only need an ack, don't care what it says. */
|
||||
static int xs_error(char *reply)
|
||||
{
|
||||
if (IS_ERR(reply))
|
||||
return PTR_ERR(reply);
|
||||
kfree(reply);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int count_strings(const char *strings, unsigned int len)
|
||||
{
|
||||
unsigned int num;
|
||||
const char *p;
|
||||
|
||||
for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
|
||||
num++;
|
||||
|
||||
return num;
|
||||
}
|
||||
|
||||
/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
|
||||
static char *join(const char *dir, const char *name)
|
||||
{
|
||||
char *buffer;
|
||||
|
||||
if (strlen(name) == 0)
|
||||
buffer = kasprintf(GFP_KERNEL, "%s", dir);
|
||||
else
|
||||
buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
|
||||
return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
|
||||
}
|
||||
|
||||
static char **split(char *strings, unsigned int len, unsigned int *num)
|
||||
{
|
||||
char *p, **ret;
|
||||
|
||||
/* Count the strings. */
|
||||
*num = count_strings(strings, len);
|
||||
|
||||
/* Transfer to one big alloc for easy freeing. */
|
||||
ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
|
||||
if (!ret) {
|
||||
kfree(strings);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
memcpy(&ret[*num], strings, len);
|
||||
kfree(strings);
|
||||
|
||||
strings = (char *)&ret[*num];
|
||||
for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
|
||||
ret[(*num)++] = p;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
char **xenbus_directory(struct xenbus_transaction t,
|
||||
const char *dir, const char *node, unsigned int *num)
|
||||
{
|
||||
char *strings, *path;
|
||||
unsigned int len;
|
||||
|
||||
path = join(dir, node);
|
||||
if (IS_ERR(path))
|
||||
return (char **)path;
|
||||
|
||||
strings = xs_single(t, XS_DIRECTORY, path, &len);
|
||||
kfree(path);
|
||||
if (IS_ERR(strings))
|
||||
return (char **)strings;
|
||||
|
||||
return split(strings, len, num);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_directory);
|
||||
|
||||
/* Check if a path exists. Return 1 if it does. */
|
||||
int xenbus_exists(struct xenbus_transaction t,
|
||||
const char *dir, const char *node)
|
||||
{
|
||||
char **d;
|
||||
int dir_n;
|
||||
|
||||
d = xenbus_directory(t, dir, node, &dir_n);
|
||||
if (IS_ERR(d))
|
||||
return 0;
|
||||
kfree(d);
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_exists);
|
||||
|
||||
/* Get the value of a single file.
|
||||
* Returns a kmalloced value: call free() on it after use.
|
||||
* len indicates length in bytes.
|
||||
*/
|
||||
void *xenbus_read(struct xenbus_transaction t,
|
||||
const char *dir, const char *node, unsigned int *len)
|
||||
{
|
||||
char *path;
|
||||
void *ret;
|
||||
|
||||
path = join(dir, node);
|
||||
if (IS_ERR(path))
|
||||
return (void *)path;
|
||||
|
||||
ret = xs_single(t, XS_READ, path, len);
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_read);
|
||||
|
||||
/* Write the value of a single file.
|
||||
* Returns -err on failure.
|
||||
*/
|
||||
int xenbus_write(struct xenbus_transaction t,
|
||||
const char *dir, const char *node, const char *string)
|
||||
{
|
||||
const char *path;
|
||||
struct kvec iovec[2];
|
||||
int ret;
|
||||
|
||||
path = join(dir, node);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
|
||||
iovec[0].iov_base = (void *)path;
|
||||
iovec[0].iov_len = strlen(path) + 1;
|
||||
iovec[1].iov_base = (void *)string;
|
||||
iovec[1].iov_len = strlen(string);
|
||||
|
||||
ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_write);
|
||||
|
||||
/* Create a new directory. */
|
||||
int xenbus_mkdir(struct xenbus_transaction t,
|
||||
const char *dir, const char *node)
|
||||
{
|
||||
char *path;
|
||||
int ret;
|
||||
|
||||
path = join(dir, node);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
|
||||
ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_mkdir);
|
||||
|
||||
/* Destroy a file or directory (directories must be empty). */
|
||||
int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
|
||||
{
|
||||
char *path;
|
||||
int ret;
|
||||
|
||||
path = join(dir, node);
|
||||
if (IS_ERR(path))
|
||||
return PTR_ERR(path);
|
||||
|
||||
ret = xs_error(xs_single(t, XS_RM, path, NULL));
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_rm);
|
||||
|
||||
/* Start a transaction: changes by others will not be seen during this
|
||||
* transaction, and changes will not be visible to others until end.
|
||||
*/
|
||||
int xenbus_transaction_start(struct xenbus_transaction *t)
|
||||
{
|
||||
char *id_str;
|
||||
|
||||
down_read(&xs_state.transaction_mutex);
|
||||
|
||||
id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
|
||||
if (IS_ERR(id_str)) {
|
||||
up_read(&xs_state.transaction_mutex);
|
||||
return PTR_ERR(id_str);
|
||||
}
|
||||
|
||||
t->id = simple_strtoul(id_str, NULL, 0);
|
||||
kfree(id_str);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_transaction_start);
|
||||
|
||||
/* End a transaction.
|
||||
* If abandon is true, transaction is discarded instead of committed.
|
||||
*/
|
||||
int xenbus_transaction_end(struct xenbus_transaction t, int abort)
|
||||
{
|
||||
char abortstr[2];
|
||||
int err;
|
||||
|
||||
if (abort)
|
||||
strcpy(abortstr, "F");
|
||||
else
|
||||
strcpy(abortstr, "T");
|
||||
|
||||
err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
|
||||
|
||||
up_read(&xs_state.transaction_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_transaction_end);
|
||||
|
||||
/* Single read and scanf: returns -errno or num scanned. */
|
||||
int xenbus_scanf(struct xenbus_transaction t,
|
||||
const char *dir, const char *node, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
char *val;
|
||||
|
||||
val = xenbus_read(t, dir, node, NULL);
|
||||
if (IS_ERR(val))
|
||||
return PTR_ERR(val);
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = vsscanf(val, fmt, ap);
|
||||
va_end(ap);
|
||||
kfree(val);
|
||||
/* Distinctive errno. */
|
||||
if (ret == 0)
|
||||
return -ERANGE;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_scanf);
|
||||
|
||||
/* Single printf and write: returns -errno or 0. */
|
||||
int xenbus_printf(struct xenbus_transaction t,
|
||||
const char *dir, const char *node, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
#define PRINTF_BUFFER_SIZE 4096
|
||||
char *printf_buffer;
|
||||
|
||||
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
|
||||
if (printf_buffer == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
|
||||
ret = xenbus_write(t, dir, node, printf_buffer);
|
||||
|
||||
kfree(printf_buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_printf);
|
||||
|
||||
/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
|
||||
int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
|
||||
{
|
||||
va_list ap;
|
||||
const char *name;
|
||||
int ret = 0;
|
||||
|
||||
va_start(ap, dir);
|
||||
while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
|
||||
const char *fmt = va_arg(ap, char *);
|
||||
void *result = va_arg(ap, void *);
|
||||
char *p;
|
||||
|
||||
p = xenbus_read(t, dir, name, NULL);
|
||||
if (IS_ERR(p)) {
|
||||
ret = PTR_ERR(p);
|
||||
break;
|
||||
}
|
||||
if (fmt) {
|
||||
if (sscanf(p, fmt, result) == 0)
|
||||
ret = -EINVAL;
|
||||
kfree(p);
|
||||
} else
|
||||
*(char **)result = p;
|
||||
}
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_gather);
|
||||
|
||||
static int xs_watch(const char *path, const char *token)
|
||||
{
|
||||
struct kvec iov[2];
|
||||
|
||||
iov[0].iov_base = (void *)path;
|
||||
iov[0].iov_len = strlen(path) + 1;
|
||||
iov[1].iov_base = (void *)token;
|
||||
iov[1].iov_len = strlen(token) + 1;
|
||||
|
||||
return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
|
||||
ARRAY_SIZE(iov), NULL));
|
||||
}
|
||||
|
||||
static int xs_unwatch(const char *path, const char *token)
|
||||
{
|
||||
struct kvec iov[2];
|
||||
|
||||
iov[0].iov_base = (char *)path;
|
||||
iov[0].iov_len = strlen(path) + 1;
|
||||
iov[1].iov_base = (char *)token;
|
||||
iov[1].iov_len = strlen(token) + 1;
|
||||
|
||||
return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
|
||||
ARRAY_SIZE(iov), NULL));
|
||||
}
|
||||
|
||||
static struct xenbus_watch *find_watch(const char *token)
|
||||
{
|
||||
struct xenbus_watch *i, *cmp;
|
||||
|
||||
cmp = (void *)simple_strtoul(token, NULL, 16);
|
||||
|
||||
list_for_each_entry(i, &watches, list)
|
||||
if (i == cmp)
|
||||
return i;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Register callback to watch this node. */
|
||||
int register_xenbus_watch(struct xenbus_watch *watch)
|
||||
{
|
||||
/* Pointer in ascii is the token. */
|
||||
char token[sizeof(watch) * 2 + 1];
|
||||
int err;
|
||||
|
||||
sprintf(token, "%lX", (long)watch);
|
||||
|
||||
down_read(&xs_state.watch_mutex);
|
||||
|
||||
spin_lock(&watches_lock);
|
||||
BUG_ON(find_watch(token));
|
||||
list_add(&watch->list, &watches);
|
||||
spin_unlock(&watches_lock);
|
||||
|
||||
err = xs_watch(watch->node, token);
|
||||
|
||||
/* Ignore errors due to multiple registration. */
|
||||
if ((err != 0) && (err != -EEXIST)) {
|
||||
spin_lock(&watches_lock);
|
||||
list_del(&watch->list);
|
||||
spin_unlock(&watches_lock);
|
||||
}
|
||||
|
||||
up_read(&xs_state.watch_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_xenbus_watch);
|
||||
|
||||
void unregister_xenbus_watch(struct xenbus_watch *watch)
|
||||
{
|
||||
struct xs_stored_msg *msg, *tmp;
|
||||
char token[sizeof(watch) * 2 + 1];
|
||||
int err;
|
||||
|
||||
sprintf(token, "%lX", (long)watch);
|
||||
|
||||
down_read(&xs_state.watch_mutex);
|
||||
|
||||
spin_lock(&watches_lock);
|
||||
BUG_ON(!find_watch(token));
|
||||
list_del(&watch->list);
|
||||
spin_unlock(&watches_lock);
|
||||
|
||||
err = xs_unwatch(watch->node, token);
|
||||
if (err)
|
||||
printk(KERN_WARNING
|
||||
"XENBUS Failed to release watch %s: %i\n",
|
||||
watch->node, err);
|
||||
|
||||
up_read(&xs_state.watch_mutex);
|
||||
|
||||
/* Make sure there are no callbacks running currently (unless
|
||||
its us) */
|
||||
if (current->pid != xenwatch_pid)
|
||||
mutex_lock(&xenwatch_mutex);
|
||||
|
||||
/* Cancel pending watch events. */
|
||||
spin_lock(&watch_events_lock);
|
||||
list_for_each_entry_safe(msg, tmp, &watch_events, list) {
|
||||
if (msg->u.watch.handle != watch)
|
||||
continue;
|
||||
list_del(&msg->list);
|
||||
kfree(msg->u.watch.vec);
|
||||
kfree(msg);
|
||||
}
|
||||
spin_unlock(&watch_events_lock);
|
||||
|
||||
if (current->pid != xenwatch_pid)
|
||||
mutex_unlock(&xenwatch_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
|
||||
|
||||
void xs_suspend(void)
|
||||
{
|
||||
down_write(&xs_state.transaction_mutex);
|
||||
down_write(&xs_state.watch_mutex);
|
||||
mutex_lock(&xs_state.request_mutex);
|
||||
mutex_lock(&xs_state.response_mutex);
|
||||
}
|
||||
|
||||
void xs_resume(void)
|
||||
{
|
||||
struct xenbus_watch *watch;
|
||||
char token[sizeof(watch) * 2 + 1];
|
||||
|
||||
mutex_unlock(&xs_state.response_mutex);
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
up_write(&xs_state.transaction_mutex);
|
||||
|
||||
/* No need for watches_lock: the watch_mutex is sufficient. */
|
||||
list_for_each_entry(watch, &watches, list) {
|
||||
sprintf(token, "%lX", (long)watch);
|
||||
xs_watch(watch->node, token);
|
||||
}
|
||||
|
||||
up_write(&xs_state.watch_mutex);
|
||||
}
|
||||
|
||||
void xs_suspend_cancel(void)
|
||||
{
|
||||
mutex_unlock(&xs_state.response_mutex);
|
||||
mutex_unlock(&xs_state.request_mutex);
|
||||
up_write(&xs_state.watch_mutex);
|
||||
up_write(&xs_state.transaction_mutex);
|
||||
}
|
||||
|
||||
static int xenwatch_thread(void *unused)
|
||||
{
|
||||
struct list_head *ent;
|
||||
struct xs_stored_msg *msg;
|
||||
|
||||
for (;;) {
|
||||
wait_event_interruptible(watch_events_waitq,
|
||||
!list_empty(&watch_events));
|
||||
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
|
||||
mutex_lock(&xenwatch_mutex);
|
||||
|
||||
spin_lock(&watch_events_lock);
|
||||
ent = watch_events.next;
|
||||
if (ent != &watch_events)
|
||||
list_del(ent);
|
||||
spin_unlock(&watch_events_lock);
|
||||
|
||||
if (ent != &watch_events) {
|
||||
msg = list_entry(ent, struct xs_stored_msg, list);
|
||||
msg->u.watch.handle->callback(
|
||||
msg->u.watch.handle,
|
||||
(const char **)msg->u.watch.vec,
|
||||
msg->u.watch.vec_size);
|
||||
kfree(msg->u.watch.vec);
|
||||
kfree(msg);
|
||||
}
|
||||
|
||||
mutex_unlock(&xenwatch_mutex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_msg(void)
|
||||
{
|
||||
struct xs_stored_msg *msg;
|
||||
char *body;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* We must disallow save/restore while reading a xenstore message.
|
||||
* A partial read across s/r leaves us out of sync with xenstored.
|
||||
*/
|
||||
for (;;) {
|
||||
err = xb_wait_for_data_to_read();
|
||||
if (err)
|
||||
return err;
|
||||
mutex_lock(&xs_state.response_mutex);
|
||||
if (xb_data_to_read())
|
||||
break;
|
||||
/* We raced with save/restore: pending data 'disappeared'. */
|
||||
mutex_unlock(&xs_state.response_mutex);
|
||||
}
|
||||
|
||||
|
||||
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
|
||||
if (msg == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xb_read(&msg->hdr, sizeof(msg->hdr));
|
||||
if (err) {
|
||||
kfree(msg);
|
||||
goto out;
|
||||
}
|
||||
|
||||
body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
|
||||
if (body == NULL) {
|
||||
kfree(msg);
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xb_read(body, msg->hdr.len);
|
||||
if (err) {
|
||||
kfree(body);
|
||||
kfree(msg);
|
||||
goto out;
|
||||
}
|
||||
body[msg->hdr.len] = '\0';
|
||||
|
||||
if (msg->hdr.type == XS_WATCH_EVENT) {
|
||||
msg->u.watch.vec = split(body, msg->hdr.len,
|
||||
&msg->u.watch.vec_size);
|
||||
if (IS_ERR(msg->u.watch.vec)) {
|
||||
kfree(msg);
|
||||
err = PTR_ERR(msg->u.watch.vec);
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&watches_lock);
|
||||
msg->u.watch.handle = find_watch(
|
||||
msg->u.watch.vec[XS_WATCH_TOKEN]);
|
||||
if (msg->u.watch.handle != NULL) {
|
||||
spin_lock(&watch_events_lock);
|
||||
list_add_tail(&msg->list, &watch_events);
|
||||
wake_up(&watch_events_waitq);
|
||||
spin_unlock(&watch_events_lock);
|
||||
} else {
|
||||
kfree(msg->u.watch.vec);
|
||||
kfree(msg);
|
||||
}
|
||||
spin_unlock(&watches_lock);
|
||||
} else {
|
||||
msg->u.reply.body = body;
|
||||
spin_lock(&xs_state.reply_lock);
|
||||
list_add_tail(&msg->list, &xs_state.reply_list);
|
||||
spin_unlock(&xs_state.reply_lock);
|
||||
wake_up(&xs_state.reply_waitq);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&xs_state.response_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xenbus_thread(void *unused)
|
||||
{
|
||||
int err;
|
||||
|
||||
for (;;) {
|
||||
err = process_msg();
|
||||
if (err)
|
||||
printk(KERN_WARNING "XENBUS error %d while reading "
|
||||
"message\n", err);
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xs_init(void)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *task;
|
||||
|
||||
INIT_LIST_HEAD(&xs_state.reply_list);
|
||||
spin_lock_init(&xs_state.reply_lock);
|
||||
init_waitqueue_head(&xs_state.reply_waitq);
|
||||
|
||||
mutex_init(&xs_state.request_mutex);
|
||||
mutex_init(&xs_state.response_mutex);
|
||||
init_rwsem(&xs_state.transaction_mutex);
|
||||
init_rwsem(&xs_state.watch_mutex);
|
||||
|
||||
/* Initialize the shared memory rings to talk to xenstored */
|
||||
err = xb_init_comms();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
task = kthread_run(xenwatch_thread, NULL, "xenwatch");
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
xenwatch_pid = task->pid;
|
||||
|
||||
task = kthread_run(xenbus_thread, NULL, "xenbus");
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -209,7 +209,7 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
|
|||
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
|
||||
envp[2] = NULL;
|
||||
|
||||
ret = call_usermodehelper(argv[0], argv, envp, 1);
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ extern int irqbalance_disable(char *str);
|
|||
extern void fixup_irqs(cpumask_t map);
|
||||
#endif
|
||||
|
||||
unsigned int do_IRQ(struct pt_regs *regs);
|
||||
void init_IRQ(void);
|
||||
void __init native_init_IRQ(void);
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef _ASM_IRQ_VECTORS_LIMITS_H
|
||||
#define _ASM_IRQ_VECTORS_LIMITS_H
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
|
||||
#define NR_IRQS 224
|
||||
# if (224 >= 32 * NR_CPUS)
|
||||
# define NR_IRQ_VECTORS NR_IRQS
|
||||
|
|
|
@ -32,6 +32,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|||
#endif
|
||||
}
|
||||
|
||||
void leave_mm(unsigned long cpu);
|
||||
|
||||
static inline void switch_mm(struct mm_struct *prev,
|
||||
struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
|
|
|
@ -52,6 +52,8 @@ struct paravirt_ops
|
|||
/* Basic arch-specific setup */
|
||||
void (*arch_setup)(void);
|
||||
char *(*memory_setup)(void);
|
||||
void (*post_allocator_init)(void);
|
||||
|
||||
void (*init_IRQ)(void);
|
||||
void (*time_init)(void);
|
||||
|
||||
|
@ -116,7 +118,7 @@ struct paravirt_ops
|
|||
|
||||
u64 (*read_tsc)(void);
|
||||
u64 (*read_pmc)(void);
|
||||
u64 (*get_scheduled_cycles)(void);
|
||||
unsigned long long (*sched_clock)(void);
|
||||
unsigned long (*get_cpu_khz)(void);
|
||||
|
||||
/* Segment descriptor handling */
|
||||
|
@ -173,7 +175,7 @@ struct paravirt_ops
|
|||
unsigned long va);
|
||||
|
||||
/* Hooks for allocating/releasing pagetable pages */
|
||||
void (*alloc_pt)(u32 pfn);
|
||||
void (*alloc_pt)(struct mm_struct *mm, u32 pfn);
|
||||
void (*alloc_pd)(u32 pfn);
|
||||
void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
|
||||
void (*release_pt)(u32 pfn);
|
||||
|
@ -260,6 +262,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
|
|||
unsigned paravirt_patch_insns(void *site, unsigned len,
|
||||
const char *start, const char *end);
|
||||
|
||||
int paravirt_disable_iospace(void);
|
||||
|
||||
/*
|
||||
* This generates an indirect call based on the operation type number.
|
||||
|
@ -563,7 +566,10 @@ static inline u64 paravirt_read_tsc(void)
|
|||
|
||||
#define rdtscll(val) (val = paravirt_read_tsc())
|
||||
|
||||
#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles())
|
||||
static inline unsigned long long paravirt_sched_clock(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long long, sched_clock);
|
||||
}
|
||||
#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
|
||||
|
||||
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
|
||||
|
@ -669,6 +675,12 @@ static inline void setup_secondary_clock(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline void paravirt_post_allocator_init(void)
|
||||
{
|
||||
if (paravirt_ops.post_allocator_init)
|
||||
(*paravirt_ops.post_allocator_init)();
|
||||
}
|
||||
|
||||
static inline void paravirt_pagetable_setup_start(pgd_t *base)
|
||||
{
|
||||
if (paravirt_ops.pagetable_setup_start)
|
||||
|
@ -725,9 +737,9 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
|
|||
PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va);
|
||||
}
|
||||
|
||||
static inline void paravirt_alloc_pt(unsigned pfn)
|
||||
static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
|
||||
{
|
||||
PVOP_VCALL1(alloc_pt, pfn);
|
||||
PVOP_VCALL2(alloc_pt, mm, pfn);
|
||||
}
|
||||
static inline void paravirt_release_pt(unsigned pfn)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
#define paravirt_alloc_pt(pfn) do { } while (0)
|
||||
#define paravirt_alloc_pt(mm, pfn) do { } while (0)
|
||||
#define paravirt_alloc_pd(pfn) do { } while (0)
|
||||
#define paravirt_alloc_pd(pfn) do { } while (0)
|
||||
#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
|
||||
|
@ -17,13 +17,13 @@
|
|||
|
||||
#define pmd_populate_kernel(mm, pmd, pte) \
|
||||
do { \
|
||||
paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
|
||||
paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \
|
||||
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
|
||||
} while (0)
|
||||
|
||||
#define pmd_populate(mm, pmd, pte) \
|
||||
do { \
|
||||
paravirt_alloc_pt(page_to_pfn(pte)); \
|
||||
paravirt_alloc_pt(mm, page_to_pfn(pte)); \
|
||||
set_pmd(pmd, __pmd(_PAGE_TABLE + \
|
||||
((unsigned long long)page_to_pfn(pte) << \
|
||||
(unsigned long long) PAGE_SHIFT))); \
|
||||
|
|
|
@ -81,6 +81,10 @@ void __init add_memory_region(unsigned long long start,
|
|||
|
||||
extern unsigned long init_pg_tables_end;
|
||||
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#define paravirt_post_allocator_init() do {} while (0)
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
|
|
@ -43,9 +43,12 @@ extern u8 x86_cpu_to_apicid[];
|
|||
|
||||
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
|
||||
|
||||
extern void set_cpu_sibling_map(int cpu);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
extern void cpu_exit_clear(void);
|
||||
extern void cpu_uninit(void);
|
||||
extern void remove_siblinginfo(int cpu);
|
||||
#endif
|
||||
|
||||
struct smp_ops
|
||||
|
@ -129,6 +132,8 @@ extern int __cpu_disable(void);
|
|||
extern void __cpu_die(unsigned int cpu);
|
||||
extern unsigned int num_processors;
|
||||
|
||||
void __cpuinit smp_store_cpu_info(int id);
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
|
|
@ -15,8 +15,38 @@ extern int no_sync_cmos_clock;
|
|||
extern int recalibrate_cpu_khz(void);
|
||||
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#define get_scheduled_cycles(val) rdtscll(val)
|
||||
#define calculate_cpu_khz() native_calculate_cpu_khz()
|
||||
#endif
|
||||
|
||||
/* Accellerators for sched_clock()
|
||||
* convert from cycles(64bits) => nanoseconds (64bits)
|
||||
* basic equation:
|
||||
* ns = cycles / (freq / ns_per_sec)
|
||||
* ns = cycles * (ns_per_sec / freq)
|
||||
* ns = cycles * (10^9 / (cpu_khz * 10^3))
|
||||
* ns = cycles * (10^6 / cpu_khz)
|
||||
*
|
||||
* Then we use scaling math (suggested by george@mvista.com) to get:
|
||||
* ns = cycles * (10^6 * SC / cpu_khz) / SC
|
||||
* ns = cycles * cyc2ns_scale / SC
|
||||
*
|
||||
* And since SC is a constant power of two, we can convert the div
|
||||
* into a shift.
|
||||
*
|
||||
* We can use khz divisor instead of mhz to keep a better percision, since
|
||||
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
|
||||
* (mathieu.desnoyers@polymtl.ca)
|
||||
*
|
||||
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
||||
*/
|
||||
extern unsigned long cyc2ns_scale __read_mostly;
|
||||
|
||||
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
||||
|
||||
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
||||
{
|
||||
return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -49,7 +49,7 @@ extern struct vmi_timer_ops {
|
|||
extern void __init vmi_time_init(void);
|
||||
extern unsigned long vmi_get_wallclock(void);
|
||||
extern int vmi_set_wallclock(unsigned long now);
|
||||
extern unsigned long long vmi_get_sched_cycles(void);
|
||||
extern unsigned long long vmi_sched_clock(void);
|
||||
extern unsigned long vmi_cpu_khz(void);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
|
|
@ -0,0 +1,413 @@
|
|||
/******************************************************************************
|
||||
* hypercall.h
|
||||
*
|
||||
* Linux-specific hypervisor handling.
|
||||
*
|
||||
* Copyright (c) 2002-2004, K A Fraser
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __HYPERCALL_H__
|
||||
#define __HYPERCALL_H__
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/sched.h>
|
||||
#include <xen/interface/physdev.h>
|
||||
|
||||
extern struct { char _entry[32]; } hypercall_page[];
|
||||
|
||||
#define _hypercall0(type, name) \
|
||||
({ \
|
||||
long __res; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res) \
|
||||
: [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
#define _hypercall1(type, name, a1) \
|
||||
({ \
|
||||
long __res, __ign1; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res), "=b" (__ign1) \
|
||||
: "1" ((long)(a1)), \
|
||||
[call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
#define _hypercall2(type, name, a1, a2) \
|
||||
({ \
|
||||
long __res, __ign1, __ign2; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res), "=b" (__ign1), "=c" (__ign2) \
|
||||
: "1" ((long)(a1)), "2" ((long)(a2)), \
|
||||
[call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
#define _hypercall3(type, name, a1, a2, a3) \
|
||||
({ \
|
||||
long __res, __ign1, __ign2, __ign3; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
|
||||
"=d" (__ign3) \
|
||||
: "1" ((long)(a1)), "2" ((long)(a2)), \
|
||||
"3" ((long)(a3)), \
|
||||
[call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
#define _hypercall4(type, name, a1, a2, a3, a4) \
|
||||
({ \
|
||||
long __res, __ign1, __ign2, __ign3, __ign4; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
|
||||
"=d" (__ign3), "=S" (__ign4) \
|
||||
: "1" ((long)(a1)), "2" ((long)(a2)), \
|
||||
"3" ((long)(a3)), "4" ((long)(a4)), \
|
||||
[call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
|
||||
({ \
|
||||
long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \
|
||||
asm volatile ( \
|
||||
"call %[call]" \
|
||||
: "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
|
||||
"=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \
|
||||
: "1" ((long)(a1)), "2" ((long)(a2)), \
|
||||
"3" ((long)(a3)), "4" ((long)(a4)), \
|
||||
"5" ((long)(a5)), \
|
||||
[call] "m" (hypercall_page[__HYPERVISOR_##name]) \
|
||||
: "memory" ); \
|
||||
(type)__res; \
|
||||
})
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_set_trap_table(struct trap_info *table)
|
||||
{
|
||||
return _hypercall1(int, set_trap_table, table);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_mmu_update(struct mmu_update *req, int count,
|
||||
int *success_count, domid_t domid)
|
||||
{
|
||||
return _hypercall4(int, mmu_update, req, count, success_count, domid);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_mmuext_op(struct mmuext_op *op, int count,
|
||||
int *success_count, domid_t domid)
|
||||
{
|
||||
return _hypercall4(int, mmuext_op, op, count, success_count, domid);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
|
||||
{
|
||||
return _hypercall2(int, set_gdt, frame_list, entries);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
|
||||
{
|
||||
return _hypercall2(int, stack_switch, ss, esp);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_set_callbacks(unsigned long event_selector,
|
||||
unsigned long event_address,
|
||||
unsigned long failsafe_selector,
|
||||
unsigned long failsafe_address)
|
||||
{
|
||||
return _hypercall4(int, set_callbacks,
|
||||
event_selector, event_address,
|
||||
failsafe_selector, failsafe_address);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_fpu_taskswitch(int set)
|
||||
{
|
||||
return _hypercall1(int, fpu_taskswitch, set);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_sched_op(int cmd, unsigned long arg)
|
||||
{
|
||||
return _hypercall2(int, sched_op, cmd, arg);
|
||||
}
|
||||
|
||||
static inline long
|
||||
HYPERVISOR_set_timer_op(u64 timeout)
|
||||
{
|
||||
unsigned long timeout_hi = (unsigned long)(timeout>>32);
|
||||
unsigned long timeout_lo = (unsigned long)timeout;
|
||||
return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_set_debugreg(int reg, unsigned long value)
|
||||
{
|
||||
return _hypercall2(int, set_debugreg, reg, value);
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
HYPERVISOR_get_debugreg(int reg)
|
||||
{
|
||||
return _hypercall1(unsigned long, get_debugreg, reg);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_update_descriptor(u64 ma, u64 desc)
|
||||
{
|
||||
return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_memory_op(unsigned int cmd, void *arg)
|
||||
{
|
||||
return _hypercall2(int, memory_op, cmd, arg);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_multicall(void *call_list, int nr_calls)
|
||||
{
|
||||
return _hypercall2(int, multicall, call_list, nr_calls);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long pte_hi = 0;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
pte_hi = new_val.pte_high;
|
||||
#endif
|
||||
return _hypercall4(int, update_va_mapping, va,
|
||||
new_val.pte_low, pte_hi, flags);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_event_channel_op(int cmd, void *arg)
|
||||
{
|
||||
int rc = _hypercall2(int, event_channel_op, cmd, arg);
|
||||
if (unlikely(rc == -ENOSYS)) {
|
||||
struct evtchn_op op;
|
||||
op.cmd = cmd;
|
||||
memcpy(&op.u, arg, sizeof(op.u));
|
||||
rc = _hypercall1(int, event_channel_op_compat, &op);
|
||||
memcpy(arg, &op.u, sizeof(op.u));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_xen_version(int cmd, void *arg)
|
||||
{
|
||||
return _hypercall2(int, xen_version, cmd, arg);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_console_io(int cmd, int count, char *str)
|
||||
{
|
||||
return _hypercall3(int, console_io, cmd, count, str);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_physdev_op(int cmd, void *arg)
|
||||
{
|
||||
int rc = _hypercall2(int, physdev_op, cmd, arg);
|
||||
if (unlikely(rc == -ENOSYS)) {
|
||||
struct physdev_op op;
|
||||
op.cmd = cmd;
|
||||
memcpy(&op.u, arg, sizeof(op.u));
|
||||
rc = _hypercall1(int, physdev_op_compat, &op);
|
||||
memcpy(arg, &op.u, sizeof(op.u));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
|
||||
{
|
||||
return _hypercall3(int, grant_table_op, cmd, uop, count);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
|
||||
unsigned long flags, domid_t domid)
|
||||
{
|
||||
unsigned long pte_hi = 0;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
pte_hi = new_val.pte_high;
|
||||
#endif
|
||||
return _hypercall5(int, update_va_mapping_otherdomain, va,
|
||||
new_val.pte_low, pte_hi, flags, domid);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
|
||||
{
|
||||
return _hypercall2(int, vm_assist, cmd, type);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
|
||||
{
|
||||
return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_suspend(unsigned long srec)
|
||||
{
|
||||
return _hypercall3(int, sched_op, SCHEDOP_shutdown,
|
||||
SHUTDOWN_suspend, srec);
|
||||
}
|
||||
|
||||
static inline int
|
||||
HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
|
||||
{
|
||||
return _hypercall2(int, nmi_op, op, arg);
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
|
||||
pte_t new_val, unsigned long flags)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_update_va_mapping;
|
||||
mcl->args[0] = va;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
mcl->args[1] = new_val.pte_low;
|
||||
mcl->args[2] = new_val.pte_high;
|
||||
#else
|
||||
mcl->args[1] = new_val.pte_low;
|
||||
mcl->args[2] = 0;
|
||||
#endif
|
||||
mcl->args[3] = flags;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
|
||||
void *uop, unsigned int count)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_grant_table_op;
|
||||
mcl->args[0] = cmd;
|
||||
mcl->args[1] = (unsigned long)uop;
|
||||
mcl->args[2] = count;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va,
|
||||
pte_t new_val, unsigned long flags,
|
||||
domid_t domid)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
|
||||
mcl->args[0] = va;
|
||||
#ifdef CONFIG_X86_PAE
|
||||
mcl->args[1] = new_val.pte_low;
|
||||
mcl->args[2] = new_val.pte_high;
|
||||
#else
|
||||
mcl->args[1] = new_val.pte_low;
|
||||
mcl->args[2] = 0;
|
||||
#endif
|
||||
mcl->args[3] = flags;
|
||||
mcl->args[4] = domid;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
|
||||
struct desc_struct desc)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_update_descriptor;
|
||||
mcl->args[0] = maddr;
|
||||
mcl->args[1] = maddr >> 32;
|
||||
mcl->args[2] = desc.a;
|
||||
mcl->args[3] = desc.b;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_memory_op;
|
||||
mcl->args[0] = cmd;
|
||||
mcl->args[1] = (unsigned long)arg;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
|
||||
int count, int *success_count, domid_t domid)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_mmu_update;
|
||||
mcl->args[0] = (unsigned long)req;
|
||||
mcl->args[1] = count;
|
||||
mcl->args[2] = (unsigned long)success_count;
|
||||
mcl->args[3] = domid;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
|
||||
int *success_count, domid_t domid)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_mmuext_op;
|
||||
mcl->args[0] = (unsigned long)op;
|
||||
mcl->args[1] = count;
|
||||
mcl->args[2] = (unsigned long)success_count;
|
||||
mcl->args[3] = domid;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_set_gdt;
|
||||
mcl->args[0] = (unsigned long)frames;
|
||||
mcl->args[1] = entries;
|
||||
}
|
||||
|
||||
static inline void
|
||||
MULTI_stack_switch(struct multicall_entry *mcl,
|
||||
unsigned long ss, unsigned long esp)
|
||||
{
|
||||
mcl->op = __HYPERVISOR_stack_switch;
|
||||
mcl->args[0] = ss;
|
||||
mcl->args[1] = esp;
|
||||
}
|
||||
|
||||
#endif /* __HYPERCALL_H__ */
|
|
@ -0,0 +1,73 @@
|
|||
/******************************************************************************
|
||||
* hypervisor.h
|
||||
*
|
||||
* Linux-specific hypervisor handling.
|
||||
*
|
||||
* Copyright (c) 2002-2004, K A Fraser
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __HYPERVISOR_H__
|
||||
#define __HYPERVISOR_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/version.h>
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/desc.h>
|
||||
#if defined(__i386__)
|
||||
# ifdef CONFIG_X86_PAE
|
||||
# include <asm-generic/pgtable-nopud.h>
|
||||
# else
|
||||
# include <asm-generic/pgtable-nopmd.h>
|
||||
# endif
|
||||
#endif
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
/* arch/i386/kernel/setup.c */
|
||||
extern struct shared_info *HYPERVISOR_shared_info;
|
||||
extern struct start_info *xen_start_info;
|
||||
#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
|
||||
|
||||
/* arch/i386/mach-xen/evtchn.c */
|
||||
/* Force a proper event-channel callback from Xen. */
|
||||
extern void force_evtchn_callback(void);
|
||||
|
||||
/* Turn jiffies into Xen system time. */
|
||||
u64 jiffies_to_st(unsigned long jiffies);
|
||||
|
||||
|
||||
#define MULTI_UVMFLAGS_INDEX 3
|
||||
#define MULTI_UVMDOMID_INDEX 4
|
||||
|
||||
#define is_running_on_xen() (xen_start_info ? 1 : 0)
|
||||
|
||||
#endif /* __HYPERVISOR_H__ */
|
|
@ -0,0 +1,188 @@
|
|||
/******************************************************************************
|
||||
* arch-x86_32.h
|
||||
*
|
||||
* Guest OS interface to x86 32-bit Xen.
|
||||
*
|
||||
* Copyright (c) 2004, K A Fraser
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_ARCH_X86_32_H__
|
||||
#define __XEN_PUBLIC_ARCH_X86_32_H__
|
||||
|
||||
#ifdef __XEN__
|
||||
#define __DEFINE_GUEST_HANDLE(name, type) \
|
||||
typedef struct { type *p; } __guest_handle_ ## name
|
||||
#else
|
||||
#define __DEFINE_GUEST_HANDLE(name, type) \
|
||||
typedef type * __guest_handle_ ## name
|
||||
#endif
|
||||
|
||||
#define DEFINE_GUEST_HANDLE_STRUCT(name) \
|
||||
__DEFINE_GUEST_HANDLE(name, struct name)
|
||||
#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
|
||||
#define GUEST_HANDLE(name) __guest_handle_ ## name
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/* Guest handles for primitive C types. */
|
||||
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
|
||||
__DEFINE_GUEST_HANDLE(uint, unsigned int);
|
||||
__DEFINE_GUEST_HANDLE(ulong, unsigned long);
|
||||
DEFINE_GUEST_HANDLE(char);
|
||||
DEFINE_GUEST_HANDLE(int);
|
||||
DEFINE_GUEST_HANDLE(long);
|
||||
DEFINE_GUEST_HANDLE(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SEGMENT DESCRIPTOR TABLES
|
||||
*/
|
||||
/*
|
||||
* A number of GDT entries are reserved by Xen. These are not situated at the
|
||||
* start of the GDT because some stupid OSes export hard-coded selector values
|
||||
* in their ABI. These hard-coded values are always near the start of the GDT,
|
||||
* so Xen places itself out of the way, at the far end of the GDT.
|
||||
*/
|
||||
#define FIRST_RESERVED_GDT_PAGE 14
|
||||
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
|
||||
#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
|
||||
|
||||
/*
|
||||
* These flat segments are in the Xen-private section of every GDT. Since these
|
||||
* are also present in the initial GDT, many OSes will be able to avoid
|
||||
* installing their own GDT.
|
||||
*/
|
||||
#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
|
||||
#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
|
||||
#define FLAT_RING1_SS 0xe021 /* GDT index 260 */
|
||||
#define FLAT_RING3_CS 0xe02b /* GDT index 261 */
|
||||
#define FLAT_RING3_DS 0xe033 /* GDT index 262 */
|
||||
#define FLAT_RING3_SS 0xe033 /* GDT index 262 */
|
||||
|
||||
#define FLAT_KERNEL_CS FLAT_RING1_CS
|
||||
#define FLAT_KERNEL_DS FLAT_RING1_DS
|
||||
#define FLAT_KERNEL_SS FLAT_RING1_SS
|
||||
#define FLAT_USER_CS FLAT_RING3_CS
|
||||
#define FLAT_USER_DS FLAT_RING3_DS
|
||||
#define FLAT_USER_SS FLAT_RING3_SS
|
||||
|
||||
/* And the trap vector is... */
|
||||
#define TRAP_INSTR "int $0x82"
|
||||
|
||||
/*
|
||||
* Virtual addresses beyond this are not modifiable by guest OSes. The
|
||||
* machine->physical mapping table starts at this address, read-only.
|
||||
*/
|
||||
#ifdef CONFIG_X86_PAE
|
||||
#define __HYPERVISOR_VIRT_START 0xF5800000
|
||||
#else
|
||||
#define __HYPERVISOR_VIRT_START 0xFC000000
|
||||
#endif
|
||||
|
||||
#ifndef HYPERVISOR_VIRT_START
|
||||
#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
|
||||
#endif
|
||||
|
||||
#ifndef machine_to_phys_mapping
|
||||
#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
|
||||
#endif
|
||||
|
||||
/* Maximum number of virtual CPUs in multi-processor guests. */
|
||||
#define MAX_VIRT_CPUS 32
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/*
|
||||
* Send an array of these to HYPERVISOR_set_trap_table()
|
||||
*/
|
||||
#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
|
||||
#define TI_GET_IF(_ti) ((_ti)->flags & 4)
|
||||
#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl))
|
||||
#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
|
||||
|
||||
struct trap_info {
|
||||
uint8_t vector; /* exception vector */
|
||||
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
|
||||
uint16_t cs; /* code selector */
|
||||
unsigned long address; /* code offset */
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(trap_info);
|
||||
|
||||
struct cpu_user_regs {
|
||||
uint32_t ebx;
|
||||
uint32_t ecx;
|
||||
uint32_t edx;
|
||||
uint32_t esi;
|
||||
uint32_t edi;
|
||||
uint32_t ebp;
|
||||
uint32_t eax;
|
||||
uint16_t error_code; /* private */
|
||||
uint16_t entry_vector; /* private */
|
||||
uint32_t eip;
|
||||
uint16_t cs;
|
||||
uint8_t saved_upcall_mask;
|
||||
uint8_t _pad0;
|
||||
uint32_t eflags; /* eflags.IF == !saved_upcall_mask */
|
||||
uint32_t esp;
|
||||
uint16_t ss, _pad1;
|
||||
uint16_t es, _pad2;
|
||||
uint16_t ds, _pad3;
|
||||
uint16_t fs, _pad4;
|
||||
uint16_t gs, _pad5;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
|
||||
|
||||
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
|
||||
|
||||
/*
|
||||
* The following is all CPU context. Note that the fpu_ctxt block is filled
|
||||
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
|
||||
*/
|
||||
struct vcpu_guest_context {
|
||||
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
|
||||
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
|
||||
#define VGCF_I387_VALID (1<<0)
|
||||
#define VGCF_HVM_GUEST (1<<1)
|
||||
#define VGCF_IN_KERNEL (1<<2)
|
||||
unsigned long flags; /* VGCF_* flags */
|
||||
struct cpu_user_regs user_regs; /* User-level CPU registers */
|
||||
struct trap_info trap_ctxt[256]; /* Virtual IDT */
|
||||
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
|
||||
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
|
||||
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
|
||||
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
|
||||
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
|
||||
unsigned long event_callback_cs; /* CS:EIP of event callback */
|
||||
unsigned long event_callback_eip;
|
||||
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
|
||||
unsigned long failsafe_callback_eip;
|
||||
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
|
||||
|
||||
struct arch_shared_info {
|
||||
unsigned long max_pfn; /* max pfn that appears in table */
|
||||
/* Frame containing list of mfns containing list of mfns containing p2m. */
|
||||
unsigned long pfn_to_mfn_frame_list_list;
|
||||
unsigned long nmi_reason;
|
||||
};
|
||||
|
||||
struct arch_vcpu_info {
|
||||
unsigned long cr2;
|
||||
unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
|
||||
};
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
/*
|
||||
* Prefix forces emulation of some non-trapping instructions.
|
||||
* Currently only CPUID.
|
||||
*/
|
||||
#ifdef __ASSEMBLY__
|
||||
#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
|
||||
#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
|
||||
#else
|
||||
#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
|
||||
#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -38,17 +38,25 @@
|
|||
* e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
|
||||
* ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
|
||||
*/
|
||||
#define ELFNOTE(name, type, desctype, descdata) \
|
||||
.pushsection .note.name, "",@note ; \
|
||||
.align 4 ; \
|
||||
#define ELFNOTE_START(name, type, flags) \
|
||||
.pushsection .note.name, flags,@note ; \
|
||||
.balign 4 ; \
|
||||
.long 2f - 1f /* namesz */ ; \
|
||||
.long 4f - 3f /* descsz */ ; \
|
||||
.long 4484f - 3f /* descsz */ ; \
|
||||
.long type ; \
|
||||
1:.asciz #name ; \
|
||||
2:.align 4 ; \
|
||||
3:desctype descdata ; \
|
||||
4:.align 4 ; \
|
||||
2:.balign 4 ; \
|
||||
3:
|
||||
|
||||
#define ELFNOTE_END \
|
||||
4484:.balign 4 ; \
|
||||
.popsection ;
|
||||
|
||||
#define ELFNOTE(name, type, desc) \
|
||||
ELFNOTE_START(name, type, "") \
|
||||
desc ; \
|
||||
ELFNOTE_END
|
||||
|
||||
#else /* !__ASSEMBLER__ */
|
||||
#include <linux/elf.h>
|
||||
/*
|
||||
|
|
|
@ -36,13 +36,57 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; }
|
|||
#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
|
||||
|
||||
struct key;
|
||||
extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[],
|
||||
struct key *session_keyring, int wait);
|
||||
struct file;
|
||||
struct subprocess_info;
|
||||
|
||||
/* Allocate a subprocess_info structure */
|
||||
struct subprocess_info *call_usermodehelper_setup(char *path,
|
||||
char **argv, char **envp);
|
||||
|
||||
/* Set various pieces of state into the subprocess_info structure */
|
||||
void call_usermodehelper_setkeys(struct subprocess_info *info,
|
||||
struct key *session_keyring);
|
||||
int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
|
||||
struct file **filp);
|
||||
void call_usermodehelper_setcleanup(struct subprocess_info *info,
|
||||
void (*cleanup)(char **argv, char **envp));
|
||||
|
||||
enum umh_wait {
|
||||
UMH_NO_WAIT = -1, /* don't wait at all */
|
||||
UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */
|
||||
UMH_WAIT_PROC = 1, /* wait for the process to complete */
|
||||
};
|
||||
|
||||
/* Actually execute the sub-process */
|
||||
int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
|
||||
|
||||
/* Free the subprocess_info. This is only needed if you're not going
|
||||
to call call_usermodehelper_exec */
|
||||
void call_usermodehelper_freeinfo(struct subprocess_info *info);
|
||||
|
||||
static inline int
|
||||
call_usermodehelper(char *path, char **argv, char **envp, int wait)
|
||||
call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
|
||||
{
|
||||
return call_usermodehelper_keys(path, argv, envp, NULL, wait);
|
||||
struct subprocess_info *info;
|
||||
|
||||
info = call_usermodehelper_setup(path, argv, envp);
|
||||
if (info == NULL)
|
||||
return -ENOMEM;
|
||||
return call_usermodehelper_exec(info, wait);
|
||||
}
|
||||
|
||||
static inline int
|
||||
call_usermodehelper_keys(char *path, char **argv, char **envp,
|
||||
struct key *session_keyring, enum umh_wait wait)
|
||||
{
|
||||
struct subprocess_info *info;
|
||||
|
||||
info = call_usermodehelper_setup(path, argv, envp);
|
||||
if (info == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
call_usermodehelper_setkeys(info, session_keyring);
|
||||
return call_usermodehelper_exec(info, wait);
|
||||
}
|
||||
|
||||
extern void usermodehelper_init(void);
|
||||
|
|
|
@ -158,6 +158,8 @@
|
|||
#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */
|
||||
#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */
|
||||
|
||||
#define XENVBD_MAJOR 202 /* Xen virtual block device */
|
||||
|
||||
#define MSR_MAJOR 202
|
||||
#define CPUID_MAJOR 203
|
||||
|
||||
|
|
|
@ -92,6 +92,7 @@
|
|||
|
||||
/* PG_owner_priv_1 users should have descriptive aliases */
|
||||
#define PG_checked PG_owner_priv_1 /* Used by some filesystems */
|
||||
#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
|
||||
|
||||
#if (BITS_PER_LONG > 32)
|
||||
/*
|
||||
|
@ -170,6 +171,10 @@ static inline void SetPageUptodate(struct page *page)
|
|||
#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
|
||||
#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
|
||||
|
||||
#define PagePinned(page) test_bit(PG_pinned, &(page)->flags)
|
||||
#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags)
|
||||
#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags)
|
||||
|
||||
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
|
||||
#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
|
||||
#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags)
|
||||
|
|
|
@ -67,6 +67,11 @@ extern void kernel_power_off(void);
|
|||
|
||||
void ctrl_alt_del(void);
|
||||
|
||||
#define POWEROFF_CMD_PATH_LEN 256
|
||||
extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
|
||||
|
||||
extern int orderly_poweroff(bool force);
|
||||
|
||||
/*
|
||||
* Emergency restart, callable from an interrupt handler.
|
||||
*/
|
||||
|
|
|
@ -105,8 +105,12 @@ extern void * memchr(const void *,int,__kernel_size_t);
|
|||
#endif
|
||||
|
||||
extern char *kstrdup(const char *s, gfp_t gfp);
|
||||
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
|
||||
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
|
||||
|
||||
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
|
||||
extern void argv_free(char **argv);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -70,6 +70,10 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
|
|||
struct page ***pages);
|
||||
extern void unmap_kernel_range(unsigned long addr, unsigned long size);
|
||||
|
||||
/* Allocate/destroy a 'vmalloc' VM area. */
|
||||
extern struct vm_struct *alloc_vm_area(size_t size);
|
||||
extern void free_vm_area(struct vm_struct *area);
|
||||
|
||||
/*
|
||||
* Internals. Dont't use..
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef _XEN_EVENTS_H
|
||||
#define _XEN_EVENTS_H
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <xen/interface/event_channel.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
enum ipi_vector {
|
||||
XEN_RESCHEDULE_VECTOR,
|
||||
XEN_CALL_FUNCTION_VECTOR,
|
||||
|
||||
XEN_NR_IPIS,
|
||||
};
|
||||
|
||||
int bind_evtchn_to_irq(unsigned int evtchn);
|
||||
int bind_evtchn_to_irqhandler(unsigned int evtchn,
|
||||
irq_handler_t handler,
|
||||
unsigned long irqflags, const char *devname,
|
||||
void *dev_id);
|
||||
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
|
||||
irq_handler_t handler,
|
||||
unsigned long irqflags, const char *devname,
|
||||
void *dev_id);
|
||||
int bind_ipi_to_irqhandler(enum ipi_vector ipi,
|
||||
unsigned int cpu,
|
||||
irq_handler_t handler,
|
||||
unsigned long irqflags,
|
||||
const char *devname,
|
||||
void *dev_id);
|
||||
|
||||
/*
|
||||
* Common unbind function for all event sources. Takes IRQ to unbind from.
|
||||
* Automatically closes the underlying event channel (even for bindings
|
||||
* made with bind_evtchn_to_irqhandler()).
|
||||
*/
|
||||
void unbind_from_irqhandler(unsigned int irq, void *dev_id);
|
||||
|
||||
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
|
||||
|
||||
static inline void notify_remote_via_evtchn(int port)
|
||||
{
|
||||
struct evtchn_send send = { .port = port };
|
||||
(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
|
||||
}
|
||||
|
||||
extern void notify_remote_via_irq(int irq);
|
||||
#endif /* _XEN_EVENTS_H */
|
|
@ -0,0 +1,23 @@
|
|||
/******************************************************************************
|
||||
* features.h
|
||||
*
|
||||
* Query the features reported by Xen.
|
||||
*
|
||||
* Copyright (c) 2006, Ian Campbell
|
||||
*/
|
||||
|
||||
#ifndef __XEN_FEATURES_H__
|
||||
#define __XEN_FEATURES_H__
|
||||
|
||||
#include <xen/interface/features.h>
|
||||
|
||||
void xen_setup_features(void);
|
||||
|
||||
extern u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
|
||||
|
||||
static inline int xen_feature(int flag)
|
||||
{
|
||||
return xen_features[flag];
|
||||
}
|
||||
|
||||
#endif /* __ASM_XEN_FEATURES_H__ */
|
|
@ -0,0 +1,107 @@
|
|||
/******************************************************************************
|
||||
* grant_table.h
|
||||
*
|
||||
* Two sets of functionality:
|
||||
* 1. Granting foreign access to our memory reservation.
|
||||
* 2. Accessing others' memory reservations via grant references.
|
||||
* (i.e., mechanisms for both sender and recipient of grant references)
|
||||
*
|
||||
* Copyright (c) 2004-2005, K A Fraser
|
||||
* Copyright (c) 2005, Christopher Clark
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version 2
|
||||
* as published by the Free Software Foundation; or, when distributed
|
||||
* separately from the Linux kernel or incorporated into other
|
||||
* software packages, subject to the following license:
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this source file (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy, modify,
|
||||
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __ASM_GNTTAB_H__
|
||||
#define __ASM_GNTTAB_H__
|
||||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <xen/interface/grant_table.h>
|
||||
|
||||
/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
|
||||
#define NR_GRANT_FRAMES 4
|
||||
|
||||
struct gnttab_free_callback {
|
||||
struct gnttab_free_callback *next;
|
||||
void (*fn)(void *);
|
||||
void *arg;
|
||||
u16 count;
|
||||
};
|
||||
|
||||
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
|
||||
int readonly);
|
||||
|
||||
/*
|
||||
* End access through the given grant reference, iff the grant entry is no
|
||||
* longer in use. Return 1 if the grant entry was freed, 0 if it is still in
|
||||
* use.
|
||||
*/
|
||||
int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
|
||||
|
||||
/*
|
||||
* Eventually end access through the given grant reference, and once that
|
||||
* access has been ended, free the given page too. Access will be ended
|
||||
* immediately iff the grant entry is not in use, otherwise it will happen
|
||||
* some time later. page may be 0, in which case no freeing will occur.
|
||||
*/
|
||||
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
|
||||
unsigned long page);
|
||||
|
||||
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
|
||||
|
||||
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
|
||||
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
|
||||
|
||||
int gnttab_query_foreign_access(grant_ref_t ref);
|
||||
|
||||
/*
|
||||
* operations on reserved batches of grant references
|
||||
*/
|
||||
int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
|
||||
|
||||
void gnttab_free_grant_reference(grant_ref_t ref);
|
||||
|
||||
void gnttab_free_grant_references(grant_ref_t head);
|
||||
|
||||
int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
|
||||
|
||||
int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
|
||||
|
||||
void gnttab_release_grant_reference(grant_ref_t *private_head,
|
||||
grant_ref_t release);
|
||||
|
||||
void gnttab_request_free_callback(struct gnttab_free_callback *callback,
|
||||
void (*fn)(void *), void *arg, u16 count);
|
||||
void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
|
||||
|
||||
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
|
||||
unsigned long frame, int readonly);
|
||||
|
||||
void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
|
||||
unsigned long pfn);
|
||||
|
||||
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
|
||||
|
||||
#endif /* __ASM_GNTTAB_H__ */
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef XEN_HVC_CONSOLE_H
|
||||
#define XEN_HVC_CONSOLE_H
|
||||
|
||||
extern struct console xenboot_console;
|
||||
|
||||
#endif /* XEN_HVC_CONSOLE_H */
|
|
@ -0,0 +1,133 @@
|
|||
/******************************************************************************
|
||||
* elfnote.h
|
||||
*
|
||||
* Definitions used for the Xen ELF notes.
|
||||
*
|
||||
* Copyright (c) 2006, Ian Campbell, XenSource Ltd.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_ELFNOTE_H__
|
||||
#define __XEN_PUBLIC_ELFNOTE_H__
|
||||
|
||||
/*
|
||||
* The notes should live in a SHT_NOTE segment and have "Xen" in the
|
||||
* name field.
|
||||
*
|
||||
* Numeric types are either 4 or 8 bytes depending on the content of
|
||||
* the desc field.
|
||||
*
|
||||
* LEGACY indicated the fields in the legacy __xen_guest string which
|
||||
* this a note type replaces.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NAME=VALUE pair (string).
|
||||
*
|
||||
* LEGACY: FEATURES and PAE
|
||||
*/
|
||||
#define XEN_ELFNOTE_INFO 0
|
||||
|
||||
/*
|
||||
* The virtual address of the entry point (numeric).
|
||||
*
|
||||
* LEGACY: VIRT_ENTRY
|
||||
*/
|
||||
#define XEN_ELFNOTE_ENTRY 1
|
||||
|
||||
/* The virtual address of the hypercall transfer page (numeric).
|
||||
*
|
||||
* LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page
|
||||
* number not a virtual address)
|
||||
*/
|
||||
#define XEN_ELFNOTE_HYPERCALL_PAGE 2
|
||||
|
||||
/* The virtual address where the kernel image should be mapped (numeric).
|
||||
*
|
||||
* Defaults to 0.
|
||||
*
|
||||
* LEGACY: VIRT_BASE
|
||||
*/
|
||||
#define XEN_ELFNOTE_VIRT_BASE 3
|
||||
|
||||
/*
|
||||
* The offset of the ELF paddr field from the acutal required
|
||||
* psuedo-physical address (numeric).
|
||||
*
|
||||
* This is used to maintain backwards compatibility with older kernels
|
||||
* which wrote __PAGE_OFFSET into that field. This field defaults to 0
|
||||
* if not present.
|
||||
*
|
||||
* LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)
|
||||
*/
|
||||
#define XEN_ELFNOTE_PADDR_OFFSET 4
|
||||
|
||||
/*
|
||||
* The version of Xen that we work with (string).
|
||||
*
|
||||
* LEGACY: XEN_VER
|
||||
*/
|
||||
#define XEN_ELFNOTE_XEN_VERSION 5
|
||||
|
||||
/*
|
||||
* The name of the guest operating system (string).
|
||||
*
|
||||
* LEGACY: GUEST_OS
|
||||
*/
|
||||
#define XEN_ELFNOTE_GUEST_OS 6
|
||||
|
||||
/*
|
||||
* The version of the guest operating system (string).
|
||||
*
|
||||
* LEGACY: GUEST_VER
|
||||
*/
|
||||
#define XEN_ELFNOTE_GUEST_VERSION 7
|
||||
|
||||
/*
|
||||
* The loader type (string).
|
||||
*
|
||||
* LEGACY: LOADER
|
||||
*/
|
||||
#define XEN_ELFNOTE_LOADER 8
|
||||
|
||||
/*
|
||||
* The kernel supports PAE (x86/32 only, string = "yes" or "no").
|
||||
*
|
||||
* LEGACY: PAE (n.b. The legacy interface included a provision to
|
||||
* indicate 'extended-cr3' support allowing L3 page tables to be
|
||||
* placed above 4G. It is assumed that any kernel new enough to use
|
||||
* these ELF notes will include this and therefore "yes" here is
|
||||
* equivalent to "yes[entended-cr3]" in the __xen_guest interface.
|
||||
*/
|
||||
#define XEN_ELFNOTE_PAE_MODE 9
|
||||
|
||||
/*
|
||||
* The features supported/required by this kernel (string).
|
||||
*
|
||||
* The string must consist of a list of feature names (as given in
|
||||
* features.h, without the "XENFEAT_" prefix) separated by '|'
|
||||
* characters. If a feature is required for the kernel to function
|
||||
* then the feature name must be preceded by a '!' character.
|
||||
*
|
||||
* LEGACY: FEATURES
|
||||
*/
|
||||
#define XEN_ELFNOTE_FEATURES 10
|
||||
|
||||
/*
|
||||
* The kernel requires the symbol table to be loaded (string = "yes" or "no")
|
||||
* LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence
|
||||
* of this string as a boolean flag rather than requiring "yes" or
|
||||
* "no".
|
||||
*/
|
||||
#define XEN_ELFNOTE_BSD_SYMTAB 11
|
||||
|
||||
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* mode: C
|
||||
* c-set-style: "BSD"
|
||||
* c-basic-offset: 4
|
||||
* tab-width: 4
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*/
|
|
@ -0,0 +1,195 @@
|
|||
/******************************************************************************
|
||||
* event_channel.h
|
||||
*
|
||||
* Event channels between domains.
|
||||
*
|
||||
* Copyright (c) 2003-2004, K A Fraser.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
|
||||
#define __XEN_PUBLIC_EVENT_CHANNEL_H__
|
||||
|
||||
typedef uint32_t evtchn_port_t;
|
||||
DEFINE_GUEST_HANDLE(evtchn_port_t);
|
||||
|
||||
/*
|
||||
* EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
|
||||
* accepting interdomain bindings from domain <remote_dom>. A fresh port
|
||||
* is allocated in <dom> and returned as <port>.
|
||||
* NOTES:
|
||||
* 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
|
||||
* 2. <rdom> may be DOMID_SELF, allowing loopback connections.
|
||||
*/
|
||||
#define EVTCHNOP_alloc_unbound 6
|
||||
struct evtchn_alloc_unbound {
|
||||
/* IN parameters */
|
||||
domid_t dom, remote_dom;
|
||||
/* OUT parameters */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
|
||||
* the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
|
||||
* a port that is unbound and marked as accepting bindings from the calling
|
||||
* domain. A fresh port is allocated in the calling domain and returned as
|
||||
* <local_port>.
|
||||
* NOTES:
|
||||
* 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
|
||||
*/
|
||||
#define EVTCHNOP_bind_interdomain 0
|
||||
struct evtchn_bind_interdomain {
|
||||
/* IN parameters. */
|
||||
domid_t remote_dom;
|
||||
evtchn_port_t remote_port;
|
||||
/* OUT parameters. */
|
||||
evtchn_port_t local_port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
|
||||
* vcpu.
|
||||
* NOTES:
|
||||
* 1. A virtual IRQ may be bound to at most one event channel per vcpu.
|
||||
* 2. The allocated event channel is bound to the specified vcpu. The binding
|
||||
* may not be changed.
|
||||
*/
|
||||
#define EVTCHNOP_bind_virq 1
|
||||
struct evtchn_bind_virq {
|
||||
/* IN parameters. */
|
||||
uint32_t virq;
|
||||
uint32_t vcpu;
|
||||
/* OUT parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
|
||||
* NOTES:
|
||||
* 1. A physical IRQ may be bound to at most one event channel per domain.
|
||||
* 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
|
||||
*/
|
||||
#define EVTCHNOP_bind_pirq 2
|
||||
struct evtchn_bind_pirq {
|
||||
/* IN parameters. */
|
||||
uint32_t pirq;
|
||||
#define BIND_PIRQ__WILL_SHARE 1
|
||||
uint32_t flags; /* BIND_PIRQ__* */
|
||||
/* OUT parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
|
||||
* NOTES:
|
||||
* 1. The allocated event channel is bound to the specified vcpu. The binding
|
||||
* may not be changed.
|
||||
*/
|
||||
#define EVTCHNOP_bind_ipi 7
|
||||
struct evtchn_bind_ipi {
|
||||
uint32_t vcpu;
|
||||
/* OUT parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_close: Close a local event channel <port>. If the channel is
|
||||
* interdomain then the remote end is placed in the unbound state
|
||||
* (EVTCHNSTAT_unbound), awaiting a new connection.
|
||||
*/
|
||||
#define EVTCHNOP_close 3
|
||||
struct evtchn_close {
|
||||
/* IN parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_send: Send an event to the remote end of the channel whose local
|
||||
* endpoint is <port>.
|
||||
*/
|
||||
#define EVTCHNOP_send 4
|
||||
struct evtchn_send {
|
||||
/* IN parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_status: Get the current status of the communication channel which
|
||||
* has an endpoint at <dom, port>.
|
||||
* NOTES:
|
||||
* 1. <dom> may be specified as DOMID_SELF.
|
||||
* 2. Only a sufficiently-privileged domain may obtain the status of an event
|
||||
* channel for which <dom> is not DOMID_SELF.
|
||||
*/
|
||||
#define EVTCHNOP_status 5
|
||||
struct evtchn_status {
|
||||
/* IN parameters */
|
||||
domid_t dom;
|
||||
evtchn_port_t port;
|
||||
/* OUT parameters */
|
||||
#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
|
||||
#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
|
||||
#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
|
||||
#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
|
||||
#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
|
||||
#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
|
||||
uint32_t status;
|
||||
uint32_t vcpu; /* VCPU to which this channel is bound. */
|
||||
union {
|
||||
struct {
|
||||
domid_t dom;
|
||||
} unbound; /* EVTCHNSTAT_unbound */
|
||||
struct {
|
||||
domid_t dom;
|
||||
evtchn_port_t port;
|
||||
} interdomain; /* EVTCHNSTAT_interdomain */
|
||||
uint32_t pirq; /* EVTCHNSTAT_pirq */
|
||||
uint32_t virq; /* EVTCHNSTAT_virq */
|
||||
} u;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
|
||||
* event is pending.
|
||||
* NOTES:
|
||||
* 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
|
||||
* the binding. This binding cannot be changed.
|
||||
* 2. All other channels notify vcpu0 by default. This default is set when
|
||||
* the channel is allocated (a port that is freed and subsequently reused
|
||||
* has its binding reset to vcpu0).
|
||||
*/
|
||||
#define EVTCHNOP_bind_vcpu 8
|
||||
struct evtchn_bind_vcpu {
|
||||
/* IN parameters. */
|
||||
evtchn_port_t port;
|
||||
uint32_t vcpu;
|
||||
};
|
||||
|
||||
/*
|
||||
* EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
|
||||
* a notification to the appropriate VCPU if an event is pending.
|
||||
*/
|
||||
#define EVTCHNOP_unmask 9
|
||||
struct evtchn_unmask {
|
||||
/* IN parameters. */
|
||||
evtchn_port_t port;
|
||||
};
|
||||
|
||||
struct evtchn_op {
|
||||
uint32_t cmd; /* EVTCHNOP_* */
|
||||
union {
|
||||
struct evtchn_alloc_unbound alloc_unbound;
|
||||
struct evtchn_bind_interdomain bind_interdomain;
|
||||
struct evtchn_bind_virq bind_virq;
|
||||
struct evtchn_bind_pirq bind_pirq;
|
||||
struct evtchn_bind_ipi bind_ipi;
|
||||
struct evtchn_close close;
|
||||
struct evtchn_send send;
|
||||
struct evtchn_status status;
|
||||
struct evtchn_bind_vcpu bind_vcpu;
|
||||
struct evtchn_unmask unmask;
|
||||
} u;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
|
||||
|
||||
#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
|
|
@ -0,0 +1,43 @@
|
|||
/******************************************************************************
|
||||
* features.h
|
||||
*
|
||||
* Feature flags, reported by XENVER_get_features.
|
||||
*
|
||||
* Copyright (c) 2006, Keir Fraser <keir@xensource.com>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_FEATURES_H__
|
||||
#define __XEN_PUBLIC_FEATURES_H__
|
||||
|
||||
/*
|
||||
* If set, the guest does not need to write-protect its pagetables, and can
|
||||
* update them via direct writes.
|
||||
*/
|
||||
#define XENFEAT_writable_page_tables 0
|
||||
|
||||
/*
|
||||
* If set, the guest does not need to write-protect its segment descriptor
|
||||
* tables, and can update them via direct writes.
|
||||
*/
|
||||
#define XENFEAT_writable_descriptor_tables 1
|
||||
|
||||
/*
|
||||
* If set, translation between the guest's 'pseudo-physical' address space
|
||||
* and the host's machine address space are handled by the hypervisor. In this
|
||||
* mode the guest does not need to perform phys-to/from-machine translations
|
||||
* when performing page table operations.
|
||||
*/
|
||||
#define XENFEAT_auto_translated_physmap 2
|
||||
|
||||
/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
|
||||
#define XENFEAT_supervisor_mode_kernel 3
|
||||
|
||||
/*
|
||||
* If set, the guest does not need to allocate x86 PAE page directories
|
||||
* below 4GB. This flag is usually implied by auto_translated_physmap.
|
||||
*/
|
||||
#define XENFEAT_pae_pgdir_above_4gb 4
|
||||
|
||||
#define XENFEAT_NR_SUBMAPS 1
|
||||
|
||||
#endif /* __XEN_PUBLIC_FEATURES_H__ */
|
|
@ -0,0 +1,375 @@
|
|||
/******************************************************************************
|
||||
* grant_table.h
|
||||
*
|
||||
* Interface for granting foreign access to page frames, and receiving
|
||||
* page-ownership transfers.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright (c) 2004, K A Fraser
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
|
||||
#define __XEN_PUBLIC_GRANT_TABLE_H__
|
||||
|
||||
|
||||
/***********************************
|
||||
* GRANT TABLE REPRESENTATION
|
||||
*/
|
||||
|
||||
/* Some rough guidelines on accessing and updating grant-table entries
|
||||
* in a concurrency-safe manner. For more information, Linux contains a
|
||||
* reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
|
||||
*
|
||||
* NB. WMB is a no-op on current-generation x86 processors. However, a
|
||||
* compiler barrier will still be required.
|
||||
*
|
||||
* Introducing a valid entry into the grant table:
|
||||
* 1. Write ent->domid.
|
||||
* 2. Write ent->frame:
|
||||
* GTF_permit_access: Frame to which access is permitted.
|
||||
* GTF_accept_transfer: Pseudo-phys frame slot being filled by new
|
||||
* frame, or zero if none.
|
||||
* 3. Write memory barrier (WMB).
|
||||
* 4. Write ent->flags, inc. valid type.
|
||||
*
|
||||
* Invalidating an unused GTF_permit_access entry:
|
||||
* 1. flags = ent->flags.
|
||||
* 2. Observe that !(flags & (GTF_reading|GTF_writing)).
|
||||
* 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
|
||||
* NB. No need for WMB as reuse of entry is control-dependent on success of
|
||||
* step 3, and all architectures guarantee ordering of ctrl-dep writes.
|
||||
*
|
||||
* Invalidating an in-use GTF_permit_access entry:
|
||||
* This cannot be done directly. Request assistance from the domain controller
|
||||
* which can set a timeout on the use of a grant entry and take necessary
|
||||
* action. (NB. This is not yet implemented!).
|
||||
*
|
||||
* Invalidating an unused GTF_accept_transfer entry:
|
||||
* 1. flags = ent->flags.
|
||||
* 2. Observe that !(flags & GTF_transfer_committed). [*]
|
||||
* 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
|
||||
* NB. No need for WMB as reuse of entry is control-dependent on success of
|
||||
* step 3, and all architectures guarantee ordering of ctrl-dep writes.
|
||||
* [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
|
||||
* The guest must /not/ modify the grant entry until the address of the
|
||||
* transferred frame is written. It is safe for the guest to spin waiting
|
||||
* for this to occur (detect by observing GTF_transfer_completed in
|
||||
* ent->flags).
|
||||
*
|
||||
* Invalidating a committed GTF_accept_transfer entry:
|
||||
* 1. Wait for (ent->flags & GTF_transfer_completed).
|
||||
*
|
||||
* Changing a GTF_permit_access from writable to read-only:
|
||||
* Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
|
||||
*
|
||||
* Changing a GTF_permit_access from read-only to writable:
|
||||
* Use SMP-safe bit-setting instruction.
|
||||
*/
|
||||
|
||||
/*
|
||||
* A grant table comprises a packed array of grant entries in one or more
|
||||
* page frames shared between Xen and a guest.
|
||||
* [XEN]: This field is written by Xen and read by the sharing guest.
|
||||
* [GST]: This field is written by the guest and read by Xen.
|
||||
*/
|
||||
struct grant_entry {
|
||||
/* GTF_xxx: various type and flag information. [XEN,GST] */
|
||||
uint16_t flags;
|
||||
/* The domain being granted foreign privileges. [GST] */
|
||||
domid_t domid;
|
||||
/*
|
||||
* GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
|
||||
* GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
|
||||
*/
|
||||
uint32_t frame;
|
||||
};
|
||||
|
||||
/*
|
||||
* Type of grant entry.
|
||||
* GTF_invalid: This grant entry grants no privileges.
|
||||
* GTF_permit_access: Allow @domid to map/access @frame.
|
||||
* GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
|
||||
* to this guest. Xen writes the page number to @frame.
|
||||
*/
|
||||
#define GTF_invalid (0U<<0)
|
||||
#define GTF_permit_access (1U<<0)
|
||||
#define GTF_accept_transfer (2U<<0)
|
||||
#define GTF_type_mask (3U<<0)
|
||||
|
||||
/*
|
||||
* Subflags for GTF_permit_access.
|
||||
* GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
|
||||
* GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
|
||||
* GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
|
||||
*/
|
||||
#define _GTF_readonly (2)
|
||||
#define GTF_readonly (1U<<_GTF_readonly)
|
||||
#define _GTF_reading (3)
|
||||
#define GTF_reading (1U<<_GTF_reading)
|
||||
#define _GTF_writing (4)
|
||||
#define GTF_writing (1U<<_GTF_writing)
|
||||
|
||||
/*
|
||||
* Subflags for GTF_accept_transfer:
|
||||
* GTF_transfer_committed: Xen sets this flag to indicate that it is committed
|
||||
* to transferring ownership of a page frame. When a guest sees this flag
|
||||
* it must /not/ modify the grant entry until GTF_transfer_completed is
|
||||
* set by Xen.
|
||||
* GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
|
||||
* after reading GTF_transfer_committed. Xen will always write the frame
|
||||
* address, followed by ORing this flag, in a timely manner.
|
||||
*/
|
||||
#define _GTF_transfer_committed (2)
|
||||
#define GTF_transfer_committed (1U<<_GTF_transfer_committed)
|
||||
#define _GTF_transfer_completed (3)
|
||||
#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
|
||||
|
||||
|
||||
/***********************************
|
||||
* GRANT TABLE QUERIES AND USES
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reference to a grant entry in a specified domain's grant table.
|
||||
*/
|
||||
typedef uint32_t grant_ref_t;
|
||||
|
||||
/*
|
||||
* Handle to track a mapping created via a grant reference.
|
||||
*/
|
||||
typedef uint32_t grant_handle_t;
|
||||
|
||||
/*
|
||||
* GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
|
||||
* by devices and/or host CPUs. If successful, <handle> is a tracking number
|
||||
* that must be presented later to destroy the mapping(s). On error, <handle>
|
||||
* is a negative status code.
|
||||
* NOTES:
|
||||
* 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
|
||||
* via which I/O devices may access the granted frame.
|
||||
* 2. If GNTMAP_host_map is specified then a mapping will be added at
|
||||
* either a host virtual address in the current address space, or at
|
||||
* a PTE at the specified machine address. The type of mapping to
|
||||
* perform is selected through the GNTMAP_contains_pte flag, and the
|
||||
* address is specified in <host_addr>.
|
||||
* 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
|
||||
* host mapping is destroyed by other means then it is *NOT* guaranteed
|
||||
* to be accounted to the correct grant reference!
|
||||
*/
|
||||
#define GNTTABOP_map_grant_ref 0
|
||||
struct gnttab_map_grant_ref {
|
||||
/* IN parameters. */
|
||||
uint64_t host_addr;
|
||||
uint32_t flags; /* GNTMAP_* */
|
||||
grant_ref_t ref;
|
||||
domid_t dom;
|
||||
/* OUT parameters. */
|
||||
int16_t status; /* GNTST_* */
|
||||
grant_handle_t handle;
|
||||
uint64_t dev_bus_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
|
||||
* tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
|
||||
* field is ignored. If non-zero, they must refer to a device/host mapping
|
||||
* that is tracked by <handle>
|
||||
* NOTES:
|
||||
* 1. The call may fail in an undefined manner if either mapping is not
|
||||
* tracked by <handle>.
|
||||
* 3. After executing a batch of unmaps, it is guaranteed that no stale
|
||||
* mappings will remain in the device or host TLBs.
|
||||
*/
|
||||
#define GNTTABOP_unmap_grant_ref 1
|
||||
struct gnttab_unmap_grant_ref {
|
||||
/* IN parameters. */
|
||||
uint64_t host_addr;
|
||||
uint64_t dev_bus_addr;
|
||||
grant_handle_t handle;
|
||||
/* OUT parameters. */
|
||||
int16_t status; /* GNTST_* */
|
||||
};
|
||||
|
||||
/*
|
||||
* GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
|
||||
* <nr_frames> pages. The frame addresses are written to the <frame_list>.
|
||||
* Only <nr_frames> addresses are written, even if the table is larger.
|
||||
* NOTES:
|
||||
* 1. <dom> may be specified as DOMID_SELF.
|
||||
* 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
|
||||
* 3. Xen may not support more than a single grant-table page per domain.
|
||||
*/
|
||||
#define GNTTABOP_setup_table 2
|
||||
struct gnttab_setup_table {
|
||||
/* IN parameters. */
|
||||
domid_t dom;
|
||||
uint32_t nr_frames;
|
||||
/* OUT parameters. */
|
||||
int16_t status; /* GNTST_* */
|
||||
ulong *frame_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* GNTTABOP_dump_table: Dump the contents of the grant table to the
|
||||
* xen console. Debugging use only.
|
||||
*/
|
||||
#define GNTTABOP_dump_table 3
|
||||
struct gnttab_dump_table {
|
||||
/* IN parameters. */
|
||||
domid_t dom;
|
||||
/* OUT parameters. */
|
||||
int16_t status; /* GNTST_* */
|
||||
};
|
||||
|
||||
/*
|
||||
* GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
|
||||
* foreign domain has previously registered its interest in the transfer via
|
||||
* <domid, ref>.
|
||||
*
|
||||
* Note that, even if the transfer fails, the specified page no longer belongs
|
||||
* to the calling domain *unless* the error is GNTST_bad_page.
|
||||
*/
|
||||
#define GNTTABOP_transfer 4
|
||||
struct gnttab_transfer {
|
||||
/* IN parameters. */
|
||||
unsigned long mfn;
|
||||
domid_t domid;
|
||||
grant_ref_t ref;
|
||||
/* OUT parameters. */
|
||||
int16_t status;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* GNTTABOP_copy: Hypervisor based copy
|
||||
* source and destinations can be eithers MFNs or, for foreign domains,
|
||||
* grant references. the foreign domain has to grant read/write access
|
||||
* in its grant table.
|
||||
*
|
||||
* The flags specify what type source and destinations are (either MFN
|
||||
* or grant reference).
|
||||
*
|
||||
* Note that this can also be used to copy data between two domains
|
||||
* via a third party if the source and destination domains had previously
|
||||
* grant appropriate access to their pages to the third party.
|
||||
*
|
||||
* source_offset specifies an offset in the source frame, dest_offset
|
||||
* the offset in the target frame and len specifies the number of
|
||||
* bytes to be copied.
|
||||
*/
|
||||
|
||||
#define _GNTCOPY_source_gref (0)
|
||||
#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref)
|
||||
#define _GNTCOPY_dest_gref (1)
|
||||
#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref)
|
||||
|
||||
#define GNTTABOP_copy 5
|
||||
struct gnttab_copy {
|
||||
/* IN parameters. */
|
||||
struct {
|
||||
union {
|
||||
grant_ref_t ref;
|
||||
unsigned long gmfn;
|
||||
} u;
|
||||
domid_t domid;
|
||||
uint16_t offset;
|
||||
} source, dest;
|
||||
uint16_t len;
|
||||
uint16_t flags; /* GNTCOPY_* */
|
||||
/* OUT parameters. */
|
||||
int16_t status;
|
||||
};
|
||||
|
||||
/*
|
||||
* GNTTABOP_query_size: Query the current and maximum sizes of the shared
|
||||
* grant table.
|
||||
* NOTES:
|
||||
* 1. <dom> may be specified as DOMID_SELF.
|
||||
* 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
|
||||
*/
|
||||
#define GNTTABOP_query_size 6
|
||||
struct gnttab_query_size {
|
||||
/* IN parameters. */
|
||||
domid_t dom;
|
||||
/* OUT parameters. */
|
||||
uint32_t nr_frames;
|
||||
uint32_t max_nr_frames;
|
||||
int16_t status; /* GNTST_* */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Bitfield values for update_pin_status.flags.
|
||||
*/
|
||||
/* Map the grant entry for access by I/O devices. */
|
||||
#define _GNTMAP_device_map (0)
|
||||
#define GNTMAP_device_map (1<<_GNTMAP_device_map)
|
||||
/* Map the grant entry for access by host CPUs. */
|
||||
#define _GNTMAP_host_map (1)
|
||||
#define GNTMAP_host_map (1<<_GNTMAP_host_map)
|
||||
/* Accesses to the granted frame will be restricted to read-only access. */
|
||||
#define _GNTMAP_readonly (2)
|
||||
#define GNTMAP_readonly (1<<_GNTMAP_readonly)
|
||||
/*
|
||||
* GNTMAP_host_map subflag:
|
||||
* 0 => The host mapping is usable only by the guest OS.
|
||||
* 1 => The host mapping is usable by guest OS + current application.
|
||||
*/
|
||||
#define _GNTMAP_application_map (3)
|
||||
#define GNTMAP_application_map (1<<_GNTMAP_application_map)
|
||||
|
||||
/*
|
||||
* GNTMAP_contains_pte subflag:
|
||||
* 0 => This map request contains a host virtual address.
|
||||
* 1 => This map request contains the machine addess of the PTE to update.
|
||||
*/
|
||||
#define _GNTMAP_contains_pte (4)
|
||||
#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
|
||||
|
||||
/*
|
||||
* Values for error status returns. All errors are -ve.
|
||||
*/
|
||||
#define GNTST_okay (0) /* Normal return. */
|
||||
#define GNTST_general_error (-1) /* General undefined error. */
|
||||
#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
|
||||
#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
|
||||
#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
|
||||
#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
|
||||
#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
|
||||
#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
|
||||
#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
|
||||
#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
|
||||
#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */
|
||||
|
||||
#define GNTTABOP_error_msgs { \
|
||||
"okay", \
|
||||
"undefined error", \
|
||||
"unrecognised domain id", \
|
||||
"invalid grant reference", \
|
||||
"invalid mapping handle", \
|
||||
"invalid virtual address", \
|
||||
"invalid device address", \
|
||||
"no spare translation slot in the I/O MMU", \
|
||||
"permission denied", \
|
||||
"bad page", \
|
||||
"copy arguments cross page boundary" \
|
||||
}
|
||||
|
||||
#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
|
|
@ -0,0 +1,94 @@
|
|||
/******************************************************************************
|
||||
* blkif.h
|
||||
*
|
||||
* Unified block-device I/O interface for Xen guest OSes.
|
||||
*
|
||||
* Copyright (c) 2003-2004, Keir Fraser
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
|
||||
#define __XEN_PUBLIC_IO_BLKIF_H__
|
||||
|
||||
#include "ring.h"
|
||||
#include "../grant_table.h"
|
||||
|
||||
/*
|
||||
* Front->back notifications: When enqueuing a new request, sending a
|
||||
* notification can be made conditional on req_event (i.e., the generic
|
||||
* hold-off mechanism provided by the ring macros). Backends must set
|
||||
* req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
|
||||
*
|
||||
* Back->front notifications: When enqueuing a new response, sending a
|
||||
* notification can be made conditional on rsp_event (i.e., the generic
|
||||
* hold-off mechanism provided by the ring macros). Frontends must set
|
||||
* rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
|
||||
*/
|
||||
|
||||
typedef uint16_t blkif_vdev_t;
|
||||
typedef uint64_t blkif_sector_t;
|
||||
|
||||
/*
|
||||
* REQUEST CODES.
|
||||
*/
|
||||
#define BLKIF_OP_READ 0
|
||||
#define BLKIF_OP_WRITE 1
|
||||
/*
|
||||
* Recognised only if "feature-barrier" is present in backend xenbus info.
|
||||
* The "feature_barrier" node contains a boolean indicating whether barrier
|
||||
* requests are likely to succeed or fail. Either way, a barrier request
|
||||
* may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
|
||||
* the underlying block-device hardware. The boolean simply indicates whether
|
||||
* or not it is worthwhile for the frontend to attempt barrier requests.
|
||||
* If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
|
||||
* create the "feature-barrier" node!
|
||||
*/
|
||||
#define BLKIF_OP_WRITE_BARRIER 2
|
||||
|
||||
/*
|
||||
* Maximum scatter/gather segments per request.
|
||||
* This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
|
||||
* NB. This could be 12 if the ring indexes weren't stored in the same page.
|
||||
*/
|
||||
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
|
||||
|
||||
struct blkif_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
uint8_t nr_segments; /* number of segments */
|
||||
blkif_vdev_t handle; /* only for read/write requests */
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
||||
struct blkif_request_segment {
|
||||
grant_ref_t gref; /* reference to I/O buffer frame */
|
||||
/* @first_sect: first sector in frame to transfer (inclusive). */
|
||||
/* @last_sect: last sector in frame to transfer (inclusive). */
|
||||
uint8_t first_sect, last_sect;
|
||||
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
};
|
||||
|
||||
struct blkif_response {
|
||||
uint64_t id; /* copied from request */
|
||||
uint8_t operation; /* copied from request */
|
||||
int16_t status; /* BLKIF_RSP_??? */
|
||||
};
|
||||
|
||||
/*
|
||||
* STATUS RETURN CODES.
|
||||
*/
|
||||
/* Operation not supported (only happens on barrier writes). */
|
||||
#define BLKIF_RSP_EOPNOTSUPP -2
|
||||
/* Operation failed for some unspecified reason (-EIO). */
|
||||
#define BLKIF_RSP_ERROR -1
|
||||
/* Operation completed successfully. */
|
||||
#define BLKIF_RSP_OKAY 0
|
||||
|
||||
/*
|
||||
* Generate blkif ring structures and types.
|
||||
*/
|
||||
|
||||
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
|
||||
|
||||
#define VDISK_CDROM 0x1
|
||||
#define VDISK_REMOVABLE 0x2
|
||||
#define VDISK_READONLY 0x4
|
||||
|
||||
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
|
|
@ -0,0 +1,23 @@
|
|||
/******************************************************************************
|
||||
* console.h
|
||||
*
|
||||
* Console I/O interface for Xen guest OSes.
|
||||
*
|
||||
* Copyright (c) 2005, Keir Fraser
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
|
||||
#define __XEN_PUBLIC_IO_CONSOLE_H__
|
||||
|
||||
typedef uint32_t XENCONS_RING_IDX;
|
||||
|
||||
#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
|
||||
|
||||
struct xencons_interface {
|
||||
char in[1024];
|
||||
char out[2048];
|
||||
XENCONS_RING_IDX in_cons, in_prod;
|
||||
XENCONS_RING_IDX out_cons, out_prod;
|
||||
};
|
||||
|
||||
#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
|
|
@ -0,0 +1,158 @@
|
|||
/******************************************************************************
|
||||
* netif.h
|
||||
*
|
||||
* Unified network-device I/O interface for Xen guest OSes.
|
||||
*
|
||||
* Copyright (c) 2003-2004, Keir Fraser
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_IO_NETIF_H__
|
||||
#define __XEN_PUBLIC_IO_NETIF_H__
|
||||
|
||||
#include "ring.h"
|
||||
#include "../grant_table.h"
|
||||
|
||||
/*
|
||||
* Notifications after enqueuing any type of message should be conditional on
|
||||
* the appropriate req_event or rsp_event field in the shared ring.
|
||||
* If the client sends notification for rx requests then it should specify
|
||||
* feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
|
||||
* that it cannot safely queue packets (as it may not be kicked to send them).
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is the 'wire' format for packets:
|
||||
* Request 1: netif_tx_request -- NETTXF_* (any flags)
|
||||
* [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
|
||||
* [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
|
||||
* Request 4: netif_tx_request -- NETTXF_more_data
|
||||
* Request 5: netif_tx_request -- NETTXF_more_data
|
||||
* ...
|
||||
* Request N: netif_tx_request -- 0
|
||||
*/
|
||||
|
||||
/* Protocol checksum field is blank in the packet (hardware offload)? */
|
||||
#define _NETTXF_csum_blank (0)
|
||||
#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
|
||||
|
||||
/* Packet data has been validated against protocol checksum. */
|
||||
#define _NETTXF_data_validated (1)
|
||||
#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
|
||||
|
||||
/* Packet continues in the next request descriptor. */
|
||||
#define _NETTXF_more_data (2)
|
||||
#define NETTXF_more_data (1U<<_NETTXF_more_data)
|
||||
|
||||
/* Packet to be followed by extra descriptor(s). */
|
||||
#define _NETTXF_extra_info (3)
|
||||
#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
|
||||
|
||||
struct xen_netif_tx_request {
|
||||
grant_ref_t gref; /* Reference to buffer page */
|
||||
uint16_t offset; /* Offset within buffer page */
|
||||
uint16_t flags; /* NETTXF_* */
|
||||
uint16_t id; /* Echoed in response message. */
|
||||
uint16_t size; /* Packet size in bytes. */
|
||||
};
|
||||
|
||||
/* Types of netif_extra_info descriptors. */
|
||||
#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
|
||||
#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
|
||||
#define XEN_NETIF_EXTRA_TYPE_MAX (2)
|
||||
|
||||
/* netif_extra_info flags. */
|
||||
#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
|
||||
#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
|
||||
|
||||
/* GSO types - only TCPv4 currently supported. */
|
||||
#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
|
||||
|
||||
/*
|
||||
* This structure needs to fit within both netif_tx_request and
|
||||
* netif_rx_response for compatibility.
|
||||
*/
|
||||
struct xen_netif_extra_info {
|
||||
uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */
|
||||
uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
|
||||
|
||||
union {
|
||||
struct {
|
||||
/*
|
||||
* Maximum payload size of each segment. For
|
||||
* example, for TCP this is just the path MSS.
|
||||
*/
|
||||
uint16_t size;
|
||||
|
||||
/*
|
||||
* GSO type. This determines the protocol of
|
||||
* the packet and any extra features required
|
||||
* to segment the packet properly.
|
||||
*/
|
||||
uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
|
||||
|
||||
/* Future expansion. */
|
||||
uint8_t pad;
|
||||
|
||||
/*
|
||||
* GSO features. This specifies any extra GSO
|
||||
* features required to process this packet,
|
||||
* such as ECN support for TCPv4.
|
||||
*/
|
||||
uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
|
||||
} gso;
|
||||
|
||||
uint16_t pad[3];
|
||||
} u;
|
||||
};
|
||||
|
||||
struct xen_netif_tx_response {
|
||||
uint16_t id;
|
||||
int16_t status; /* NETIF_RSP_* */
|
||||
};
|
||||
|
||||
struct xen_netif_rx_request {
|
||||
uint16_t id; /* Echoed in response message. */
|
||||
grant_ref_t gref; /* Reference to incoming granted frame */
|
||||
};
|
||||
|
||||
/* Packet data has been validated against protocol checksum. */
|
||||
#define _NETRXF_data_validated (0)
|
||||
#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
|
||||
|
||||
/* Protocol checksum field is blank in the packet (hardware offload)? */
|
||||
#define _NETRXF_csum_blank (1)
|
||||
#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
|
||||
|
||||
/* Packet continues in the next request descriptor. */
|
||||
#define _NETRXF_more_data (2)
|
||||
#define NETRXF_more_data (1U<<_NETRXF_more_data)
|
||||
|
||||
/* Packet to be followed by extra descriptor(s). */
|
||||
#define _NETRXF_extra_info (3)
|
||||
#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
|
||||
|
||||
struct xen_netif_rx_response {
|
||||
uint16_t id;
|
||||
uint16_t offset; /* Offset in page of start of received packet */
|
||||
uint16_t flags; /* NETRXF_* */
|
||||
int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
|
||||
};
|
||||
|
||||
/*
|
||||
* Generate netif ring structures and types.
|
||||
*/
|
||||
|
||||
DEFINE_RING_TYPES(xen_netif_tx,
|
||||
struct xen_netif_tx_request,
|
||||
struct xen_netif_tx_response);
|
||||
DEFINE_RING_TYPES(xen_netif_rx,
|
||||
struct xen_netif_rx_request,
|
||||
struct xen_netif_rx_response);
|
||||
|
||||
#define NETIF_RSP_DROPPED -2
|
||||
#define NETIF_RSP_ERROR -1
|
||||
#define NETIF_RSP_OKAY 0
|
||||
/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
|
||||
#define NETIF_RSP_NULL 1
|
||||
|
||||
#endif
|
|
@ -0,0 +1,260 @@
|
|||
/******************************************************************************
|
||||
* ring.h
|
||||
*
|
||||
* Shared producer-consumer ring macros.
|
||||
*
|
||||
* Tim Deegan and Andrew Warfield November 2004.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_IO_RING_H__
|
||||
#define __XEN_PUBLIC_IO_RING_H__
|
||||
|
||||
typedef unsigned int RING_IDX;
|
||||
|
||||
/* Round a 32-bit unsigned constant down to the nearest power of two. */
|
||||
#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
|
||||
#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x))
|
||||
#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x))
|
||||
#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
|
||||
#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
|
||||
|
||||
/*
|
||||
* Calculate size of a shared ring, given the total available space for the
|
||||
* ring and indexes (_sz), and the name tag of the request/response structure.
|
||||
* A ring contains as many entries as will fit, rounded down to the nearest
|
||||
* power of two (so we can mask with (size-1) to loop around).
|
||||
*/
|
||||
#define __RING_SIZE(_s, _sz) \
|
||||
(__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
|
||||
|
||||
/*
|
||||
* Macros to make the correct C datatypes for a new kind of ring.
|
||||
*
|
||||
* To make a new ring datatype, you need to have two message structures,
|
||||
* let's say struct request, and struct response already defined.
|
||||
*
|
||||
* In a header where you want the ring datatype declared, you then do:
|
||||
*
|
||||
* DEFINE_RING_TYPES(mytag, struct request, struct response);
|
||||
*
|
||||
* These expand out to give you a set of types, as you can see below.
|
||||
* The most important of these are:
|
||||
*
|
||||
* struct mytag_sring - The shared ring.
|
||||
* struct mytag_front_ring - The 'front' half of the ring.
|
||||
* struct mytag_back_ring - The 'back' half of the ring.
|
||||
*
|
||||
* To initialize a ring in your code you need to know the location and size
|
||||
* of the shared memory area (PAGE_SIZE, for instance). To initialise
|
||||
* the front half:
|
||||
*
|
||||
* struct mytag_front_ring front_ring;
|
||||
* SHARED_RING_INIT((struct mytag_sring *)shared_page);
|
||||
* FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page,
|
||||
* PAGE_SIZE);
|
||||
*
|
||||
* Initializing the back follows similarly (note that only the front
|
||||
* initializes the shared ring):
|
||||
*
|
||||
* struct mytag_back_ring back_ring;
|
||||
* BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page,
|
||||
* PAGE_SIZE);
|
||||
*/
|
||||
|
||||
#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
|
||||
\
|
||||
/* Shared ring entry */ \
|
||||
union __name##_sring_entry { \
|
||||
__req_t req; \
|
||||
__rsp_t rsp; \
|
||||
}; \
|
||||
\
|
||||
/* Shared ring page */ \
|
||||
struct __name##_sring { \
|
||||
RING_IDX req_prod, req_event; \
|
||||
RING_IDX rsp_prod, rsp_event; \
|
||||
uint8_t pad[48]; \
|
||||
union __name##_sring_entry ring[1]; /* variable-length */ \
|
||||
}; \
|
||||
\
|
||||
/* "Front" end's private variables */ \
|
||||
struct __name##_front_ring { \
|
||||
RING_IDX req_prod_pvt; \
|
||||
RING_IDX rsp_cons; \
|
||||
unsigned int nr_ents; \
|
||||
struct __name##_sring *sring; \
|
||||
}; \
|
||||
\
|
||||
/* "Back" end's private variables */ \
|
||||
struct __name##_back_ring { \
|
||||
RING_IDX rsp_prod_pvt; \
|
||||
RING_IDX req_cons; \
|
||||
unsigned int nr_ents; \
|
||||
struct __name##_sring *sring; \
|
||||
};
|
||||
|
||||
/*
|
||||
* Macros for manipulating rings.
|
||||
*
|
||||
* FRONT_RING_whatever works on the "front end" of a ring: here
|
||||
* requests are pushed on to the ring and responses taken off it.
|
||||
*
|
||||
* BACK_RING_whatever works on the "back end" of a ring: here
|
||||
* requests are taken off the ring and responses put on.
|
||||
*
|
||||
* N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
|
||||
* This is OK in 1-for-1 request-response situations where the
|
||||
* requestor (front end) never has more than RING_SIZE()-1
|
||||
* outstanding requests.
|
||||
*/
|
||||
|
||||
/* Initialising empty rings */
|
||||
#define SHARED_RING_INIT(_s) do { \
|
||||
(_s)->req_prod = (_s)->rsp_prod = 0; \
|
||||
(_s)->req_event = (_s)->rsp_event = 1; \
|
||||
memset((_s)->pad, 0, sizeof((_s)->pad)); \
|
||||
} while(0)
|
||||
|
||||
#define FRONT_RING_INIT(_r, _s, __size) do { \
|
||||
(_r)->req_prod_pvt = 0; \
|
||||
(_r)->rsp_cons = 0; \
|
||||
(_r)->nr_ents = __RING_SIZE(_s, __size); \
|
||||
(_r)->sring = (_s); \
|
||||
} while (0)
|
||||
|
||||
#define BACK_RING_INIT(_r, _s, __size) do { \
|
||||
(_r)->rsp_prod_pvt = 0; \
|
||||
(_r)->req_cons = 0; \
|
||||
(_r)->nr_ents = __RING_SIZE(_s, __size); \
|
||||
(_r)->sring = (_s); \
|
||||
} while (0)
|
||||
|
||||
/* Initialize to existing shared indexes -- for recovery */
|
||||
#define FRONT_RING_ATTACH(_r, _s, __size) do { \
|
||||
(_r)->sring = (_s); \
|
||||
(_r)->req_prod_pvt = (_s)->req_prod; \
|
||||
(_r)->rsp_cons = (_s)->rsp_prod; \
|
||||
(_r)->nr_ents = __RING_SIZE(_s, __size); \
|
||||
} while (0)
|
||||
|
||||
#define BACK_RING_ATTACH(_r, _s, __size) do { \
|
||||
(_r)->sring = (_s); \
|
||||
(_r)->rsp_prod_pvt = (_s)->rsp_prod; \
|
||||
(_r)->req_cons = (_s)->req_prod; \
|
||||
(_r)->nr_ents = __RING_SIZE(_s, __size); \
|
||||
} while (0)
|
||||
|
||||
/* How big is this ring? */
|
||||
#define RING_SIZE(_r) \
|
||||
((_r)->nr_ents)
|
||||
|
||||
/* Number of free requests (for use on front side only). */
|
||||
#define RING_FREE_REQUESTS(_r) \
|
||||
(RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
|
||||
|
||||
/* Test if there is an empty slot available on the front ring.
|
||||
* (This is only meaningful from the front. )
|
||||
*/
|
||||
#define RING_FULL(_r) \
|
||||
(RING_FREE_REQUESTS(_r) == 0)
|
||||
|
||||
/* Test if there are outstanding messages to be processed on a ring. */
|
||||
#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
|
||||
((_r)->sring->rsp_prod - (_r)->rsp_cons)
|
||||
|
||||
#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
|
||||
({ \
|
||||
unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
|
||||
unsigned int rsp = RING_SIZE(_r) - \
|
||||
((_r)->req_cons - (_r)->rsp_prod_pvt); \
|
||||
req < rsp ? req : rsp; \
|
||||
})
|
||||
|
||||
/* Direct access to individual ring elements, by index. */
|
||||
#define RING_GET_REQUEST(_r, _idx) \
|
||||
(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
|
||||
|
||||
#define RING_GET_RESPONSE(_r, _idx) \
|
||||
(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
|
||||
|
||||
/* Loop termination condition: Would the specified index overflow the ring? */
|
||||
#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
|
||||
(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
|
||||
|
||||
#define RING_PUSH_REQUESTS(_r) do { \
|
||||
wmb(); /* back sees requests /before/ updated producer index */ \
|
||||
(_r)->sring->req_prod = (_r)->req_prod_pvt; \
|
||||
} while (0)
|
||||
|
||||
#define RING_PUSH_RESPONSES(_r) do { \
|
||||
wmb(); /* front sees responses /before/ updated producer index */ \
|
||||
(_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Notification hold-off (req_event and rsp_event):
|
||||
*
|
||||
* When queueing requests or responses on a shared ring, it may not always be
|
||||
* necessary to notify the remote end. For example, if requests are in flight
|
||||
* in a backend, the front may be able to queue further requests without
|
||||
* notifying the back (if the back checks for new requests when it queues
|
||||
* responses).
|
||||
*
|
||||
* When enqueuing requests or responses:
|
||||
*
|
||||
* Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
|
||||
* is a boolean return value. True indicates that the receiver requires an
|
||||
* asynchronous notification.
|
||||
*
|
||||
* After dequeuing requests or responses (before sleeping the connection):
|
||||
*
|
||||
* Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
|
||||
* The second argument is a boolean return value. True indicates that there
|
||||
* are pending messages on the ring (i.e., the connection should not be put
|
||||
* to sleep).
|
||||
*
|
||||
* These macros will set the req_event/rsp_event field to trigger a
|
||||
* notification on the very next message that is enqueued. If you want to
|
||||
* create batches of work (i.e., only receive a notification after several
|
||||
* messages have been enqueued) then you will need to create a customised
|
||||
* version of the FINAL_CHECK macro in your own code, which sets the event
|
||||
* field appropriately.
|
||||
*/
|
||||
|
||||
#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
|
||||
RING_IDX __old = (_r)->sring->req_prod; \
|
||||
RING_IDX __new = (_r)->req_prod_pvt; \
|
||||
wmb(); /* back sees requests /before/ updated producer index */ \
|
||||
(_r)->sring->req_prod = __new; \
|
||||
mb(); /* back sees new requests /before/ we check req_event */ \
|
||||
(_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
|
||||
(RING_IDX)(__new - __old)); \
|
||||
} while (0)
|
||||
|
||||
#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
|
||||
RING_IDX __old = (_r)->sring->rsp_prod; \
|
||||
RING_IDX __new = (_r)->rsp_prod_pvt; \
|
||||
wmb(); /* front sees responses /before/ updated producer index */ \
|
||||
(_r)->sring->rsp_prod = __new; \
|
||||
mb(); /* front sees new responses /before/ we check rsp_event */ \
|
||||
(_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
|
||||
(RING_IDX)(__new - __old)); \
|
||||
} while (0)
|
||||
|
||||
#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
|
||||
(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
|
||||
if (_work_to_do) break; \
|
||||
(_r)->sring->req_event = (_r)->req_cons + 1; \
|
||||
mb(); \
|
||||
(_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
|
||||
} while (0)
|
||||
|
||||
#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
|
||||
(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
|
||||
if (_work_to_do) break; \
|
||||
(_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
|
||||
mb(); \
|
||||
(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
|
||||
} while (0)
|
||||
|
||||
#endif /* __XEN_PUBLIC_IO_RING_H__ */
|
|
@ -0,0 +1,44 @@
|
|||
/*****************************************************************************
|
||||
* xenbus.h
|
||||
*
|
||||
* Xenbus protocol details.
|
||||
*
|
||||
* Copyright (C) 2005 XenSource Ltd.
|
||||
*/
|
||||
|
||||
#ifndef _XEN_PUBLIC_IO_XENBUS_H
|
||||
#define _XEN_PUBLIC_IO_XENBUS_H
|
||||
|
||||
/* The state of either end of the Xenbus, i.e. the current communication
|
||||
status of initialisation across the bus. States here imply nothing about
|
||||
the state of the connection between the driver and the kernel's device
|
||||
layers. */
|
||||
enum xenbus_state
|
||||
{
|
||||
XenbusStateUnknown = 0,
|
||||
XenbusStateInitialising = 1,
|
||||
XenbusStateInitWait = 2, /* Finished early
|
||||
initialisation, but waiting
|
||||
for information from the peer
|
||||
or hotplug scripts. */
|
||||
XenbusStateInitialised = 3, /* Initialised and waiting for a
|
||||
connection from the peer. */
|
||||
XenbusStateConnected = 4,
|
||||
XenbusStateClosing = 5, /* The device is being closed
|
||||
due to an error or an unplug
|
||||
event. */
|
||||
XenbusStateClosed = 6
|
||||
|
||||
};
|
||||
|
||||
#endif /* _XEN_PUBLIC_IO_XENBUS_H */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* c-file-style: "linux"
|
||||
* indent-tabs-mode: t
|
||||
* c-indent-level: 8
|
||||
* c-basic-offset: 8
|
||||
* tab-width: 8
|
||||
* End:
|
||||
*/
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Details of the "wire" protocol between Xen Store Daemon and client
|
||||
* library or guest kernel.
|
||||
* Copyright (C) 2005 Rusty Russell IBM Corporation
|
||||
*/
|
||||
|
||||
#ifndef _XS_WIRE_H
|
||||
#define _XS_WIRE_H
|
||||
|
||||
enum xsd_sockmsg_type
|
||||
{
|
||||
XS_DEBUG,
|
||||
XS_DIRECTORY,
|
||||
XS_READ,
|
||||
XS_GET_PERMS,
|
||||
XS_WATCH,
|
||||
XS_UNWATCH,
|
||||
XS_TRANSACTION_START,
|
||||
XS_TRANSACTION_END,
|
||||
XS_INTRODUCE,
|
||||
XS_RELEASE,
|
||||
XS_GET_DOMAIN_PATH,
|
||||
XS_WRITE,
|
||||
XS_MKDIR,
|
||||
XS_RM,
|
||||
XS_SET_PERMS,
|
||||
XS_WATCH_EVENT,
|
||||
XS_ERROR,
|
||||
XS_IS_DOMAIN_INTRODUCED
|
||||
};
|
||||
|
||||
#define XS_WRITE_NONE "NONE"
|
||||
#define XS_WRITE_CREATE "CREATE"
|
||||
#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
|
||||
|
||||
/* We hand errors as strings, for portability. */
|
||||
struct xsd_errors
|
||||
{
|
||||
int errnum;
|
||||
const char *errstring;
|
||||
};
|
||||
#define XSD_ERROR(x) { x, #x }
|
||||
static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
|
||||
XSD_ERROR(EINVAL),
|
||||
XSD_ERROR(EACCES),
|
||||
XSD_ERROR(EEXIST),
|
||||
XSD_ERROR(EISDIR),
|
||||
XSD_ERROR(ENOENT),
|
||||
XSD_ERROR(ENOMEM),
|
||||
XSD_ERROR(ENOSPC),
|
||||
XSD_ERROR(EIO),
|
||||
XSD_ERROR(ENOTEMPTY),
|
||||
XSD_ERROR(ENOSYS),
|
||||
XSD_ERROR(EROFS),
|
||||
XSD_ERROR(EBUSY),
|
||||
XSD_ERROR(EAGAIN),
|
||||
XSD_ERROR(EISCONN)
|
||||
};
|
||||
|
||||
struct xsd_sockmsg
|
||||
{
|
||||
uint32_t type; /* XS_??? */
|
||||
uint32_t req_id;/* Request identifier, echoed in daemon's response. */
|
||||
uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
|
||||
uint32_t len; /* Length of data following this. */
|
||||
|
||||
/* Generally followed by nul-terminated string(s). */
|
||||
};
|
||||
|
||||
enum xs_watch_type
|
||||
{
|
||||
XS_WATCH_PATH = 0,
|
||||
XS_WATCH_TOKEN
|
||||
};
|
||||
|
||||
/* Inter-domain shared memory communications. */
|
||||
#define XENSTORE_RING_SIZE 1024
|
||||
typedef uint32_t XENSTORE_RING_IDX;
|
||||
#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
|
||||
struct xenstore_domain_interface {
|
||||
char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
|
||||
char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
|
||||
XENSTORE_RING_IDX req_cons, req_prod;
|
||||
XENSTORE_RING_IDX rsp_cons, rsp_prod;
|
||||
};
|
||||
|
||||
#endif /* _XS_WIRE_H */
|
|
@ -0,0 +1,145 @@
|
|||
/******************************************************************************
|
||||
* memory.h
|
||||
*
|
||||
* Memory reservation and information.
|
||||
*
|
||||
* Copyright (c) 2005, Keir Fraser <keir@xensource.com>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_MEMORY_H__
|
||||
#define __XEN_PUBLIC_MEMORY_H__
|
||||
|
||||
/*
|
||||
* Increase or decrease the specified domain's memory reservation. Returns a
|
||||
* -ve errcode on failure, or the # extents successfully allocated or freed.
|
||||
* arg == addr of struct xen_memory_reservation.
|
||||
*/
|
||||
#define XENMEM_increase_reservation 0
|
||||
#define XENMEM_decrease_reservation 1
|
||||
#define XENMEM_populate_physmap 6
|
||||
struct xen_memory_reservation {
|
||||
|
||||
/*
|
||||
* XENMEM_increase_reservation:
|
||||
* OUT: MFN (*not* GMFN) bases of extents that were allocated
|
||||
* XENMEM_decrease_reservation:
|
||||
* IN: GMFN bases of extents to free
|
||||
* XENMEM_populate_physmap:
|
||||
* IN: GPFN bases of extents to populate with memory
|
||||
* OUT: GMFN bases of extents that were allocated
|
||||
* (NB. This command also updates the mach_to_phys translation table)
|
||||
*/
|
||||
GUEST_HANDLE(ulong) extent_start;
|
||||
|
||||
/* Number of extents, and size/alignment of each (2^extent_order pages). */
|
||||
unsigned long nr_extents;
|
||||
unsigned int extent_order;
|
||||
|
||||
/*
|
||||
* Maximum # bits addressable by the user of the allocated region (e.g.,
|
||||
* I/O devices often have a 32-bit limitation even in 64-bit systems). If
|
||||
* zero then the user has no addressing restriction.
|
||||
* This field is not used by XENMEM_decrease_reservation.
|
||||
*/
|
||||
unsigned int address_bits;
|
||||
|
||||
/*
|
||||
* Domain whose reservation is being changed.
|
||||
* Unprivileged domains can specify only DOMID_SELF.
|
||||
*/
|
||||
domid_t domid;
|
||||
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
|
||||
|
||||
/*
|
||||
* Returns the maximum machine frame number of mapped RAM in this system.
|
||||
* This command always succeeds (it never returns an error code).
|
||||
* arg == NULL.
|
||||
*/
|
||||
#define XENMEM_maximum_ram_page 2
|
||||
|
||||
/*
|
||||
* Returns the current or maximum memory reservation, in pages, of the
|
||||
* specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
|
||||
* arg == addr of domid_t.
|
||||
*/
|
||||
#define XENMEM_current_reservation 3
|
||||
#define XENMEM_maximum_reservation 4
|
||||
|
||||
/*
|
||||
* Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
|
||||
* mapping table. Architectures which do not have a m2p table do not implement
|
||||
* this command.
|
||||
* arg == addr of xen_machphys_mfn_list_t.
|
||||
*/
|
||||
#define XENMEM_machphys_mfn_list 5
|
||||
struct xen_machphys_mfn_list {
|
||||
/*
|
||||
* Size of the 'extent_start' array. Fewer entries will be filled if the
|
||||
* machphys table is smaller than max_extents * 2MB.
|
||||
*/
|
||||
unsigned int max_extents;
|
||||
|
||||
/*
|
||||
* Pointer to buffer to fill with list of extent starts. If there are
|
||||
* any large discontiguities in the machine address space, 2MB gaps in
|
||||
* the machphys table will be represented by an MFN base of zero.
|
||||
*/
|
||||
GUEST_HANDLE(ulong) extent_start;
|
||||
|
||||
/*
|
||||
* Number of extents written to the above array. This will be smaller
|
||||
* than 'max_extents' if the machphys table is smaller than max_e * 2MB.
|
||||
*/
|
||||
unsigned int nr_extents;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
|
||||
|
||||
/*
|
||||
* Sets the GPFN at which a particular page appears in the specified guest's
|
||||
* pseudophysical address space.
|
||||
* arg == addr of xen_add_to_physmap_t.
|
||||
*/
|
||||
#define XENMEM_add_to_physmap 7
|
||||
struct xen_add_to_physmap {
|
||||
/* Which domain to change the mapping for. */
|
||||
domid_t domid;
|
||||
|
||||
/* Source mapping space. */
|
||||
#define XENMAPSPACE_shared_info 0 /* shared info page */
|
||||
#define XENMAPSPACE_grant_table 1 /* grant table page */
|
||||
unsigned int space;
|
||||
|
||||
/* Index into source mapping space. */
|
||||
unsigned long idx;
|
||||
|
||||
/* GPFN where the source mapping page should appear. */
|
||||
unsigned long gpfn;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
|
||||
|
||||
/*
|
||||
* Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
|
||||
* code on failure. This call only works for auto-translated guests.
|
||||
*/
|
||||
#define XENMEM_translate_gpfn_list 8
|
||||
struct xen_translate_gpfn_list {
|
||||
/* Which domain to translate for? */
|
||||
domid_t domid;
|
||||
|
||||
/* Length of list. */
|
||||
unsigned long nr_gpfns;
|
||||
|
||||
/* List of GPFNs to translate. */
|
||||
GUEST_HANDLE(ulong) gpfn_list;
|
||||
|
||||
/*
|
||||
* Output list to contain MFN translations. May be the same as the input
|
||||
* list (in which case each input GPFN is overwritten with the output MFN).
|
||||
*/
|
||||
GUEST_HANDLE(ulong) mfn_list;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
|
||||
|
||||
#endif /* __XEN_PUBLIC_MEMORY_H__ */
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_PHYSDEV_H__
|
||||
#define __XEN_PUBLIC_PHYSDEV_H__
|
||||
|
||||
/*
|
||||
* Prototype for this hypercall is:
|
||||
* int physdev_op(int cmd, void *args)
|
||||
* @cmd == PHYSDEVOP_??? (physdev operation).
|
||||
* @args == Operation-specific extra arguments (NULL if none).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Notify end-of-interrupt (EOI) for the specified IRQ.
|
||||
* @arg == pointer to physdev_eoi structure.
|
||||
*/
|
||||
#define PHYSDEVOP_eoi 12
|
||||
struct physdev_eoi {
|
||||
/* IN */
|
||||
uint32_t irq;
|
||||
};
|
||||
|
||||
/*
|
||||
* Query the status of an IRQ line.
|
||||
* @arg == pointer to physdev_irq_status_query structure.
|
||||
*/
|
||||
#define PHYSDEVOP_irq_status_query 5
|
||||
struct physdev_irq_status_query {
|
||||
/* IN */
|
||||
uint32_t irq;
|
||||
/* OUT */
|
||||
uint32_t flags; /* XENIRQSTAT_* */
|
||||
};
|
||||
|
||||
/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
|
||||
#define _XENIRQSTAT_needs_eoi (0)
|
||||
#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi)
|
||||
|
||||
/* IRQ shared by multiple guests? */
|
||||
#define _XENIRQSTAT_shared (1)
|
||||
#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared)
|
||||
|
||||
/*
|
||||
* Set the current VCPU's I/O privilege level.
|
||||
* @arg == pointer to physdev_set_iopl structure.
|
||||
*/
|
||||
#define PHYSDEVOP_set_iopl 6
|
||||
struct physdev_set_iopl {
|
||||
/* IN */
|
||||
uint32_t iopl;
|
||||
};
|
||||
|
||||
/*
|
||||
* Set the current VCPU's I/O-port permissions bitmap.
|
||||
* @arg == pointer to physdev_set_iobitmap structure.
|
||||
*/
|
||||
#define PHYSDEVOP_set_iobitmap 7
|
||||
struct physdev_set_iobitmap {
|
||||
/* IN */
|
||||
uint8_t * bitmap;
|
||||
uint32_t nr_ports;
|
||||
};
|
||||
|
||||
/*
|
||||
* Read or write an IO-APIC register.
|
||||
* @arg == pointer to physdev_apic structure.
|
||||
*/
|
||||
#define PHYSDEVOP_apic_read 8
|
||||
#define PHYSDEVOP_apic_write 9
|
||||
struct physdev_apic {
|
||||
/* IN */
|
||||
unsigned long apic_physbase;
|
||||
uint32_t reg;
|
||||
/* IN or OUT */
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate or free a physical upcall vector for the specified IRQ line.
|
||||
* @arg == pointer to physdev_irq structure.
|
||||
*/
|
||||
#define PHYSDEVOP_alloc_irq_vector 10
|
||||
#define PHYSDEVOP_free_irq_vector 11
|
||||
struct physdev_irq {
|
||||
/* IN */
|
||||
uint32_t irq;
|
||||
/* IN or OUT */
|
||||
uint32_t vector;
|
||||
};
|
||||
|
||||
/*
|
||||
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
|
||||
* hypercall since 0x00030202.
|
||||
*/
|
||||
struct physdev_op {
|
||||
uint32_t cmd;
|
||||
union {
|
||||
struct physdev_irq_status_query irq_status_query;
|
||||
struct physdev_set_iopl set_iopl;
|
||||
struct physdev_set_iobitmap set_iobitmap;
|
||||
struct physdev_apic apic_op;
|
||||
struct physdev_irq irq_op;
|
||||
} u;
|
||||
};
|
||||
|
||||
/*
|
||||
* Notify that some PIRQ-bound event channels have been unmasked.
|
||||
* ** This command is obsolete since interface version 0x00030202 and is **
|
||||
* ** unsupported by newer versions of Xen. **
|
||||
*/
|
||||
#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
|
||||
|
||||
/*
|
||||
* These all-capitals physdev operation names are superceded by the new names
|
||||
* (defined above) since interface version 0x00030202.
|
||||
*/
|
||||
#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query
|
||||
#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl
|
||||
#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap
|
||||
#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read
|
||||
#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write
|
||||
#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector
|
||||
#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector
|
||||
#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
|
||||
#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared
|
||||
|
||||
#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
|
|
@ -0,0 +1,77 @@
|
|||
/******************************************************************************
|
||||
* sched.h
|
||||
*
|
||||
* Scheduler state interactions
|
||||
*
|
||||
* Copyright (c) 2005, Keir Fraser <keir@xensource.com>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_SCHED_H__
|
||||
#define __XEN_PUBLIC_SCHED_H__
|
||||
|
||||
#include "event_channel.h"
|
||||
|
||||
/*
|
||||
* The prototype for this hypercall is:
|
||||
* long sched_op_new(int cmd, void *arg)
|
||||
* @cmd == SCHEDOP_??? (scheduler operation).
|
||||
* @arg == Operation-specific extra argument(s), as described below.
|
||||
*
|
||||
* **NOTE**:
|
||||
* Versions of Xen prior to 3.0.2 provide only the following legacy version
|
||||
* of this hypercall, supporting only the commands yield, block and shutdown:
|
||||
* long sched_op(int cmd, unsigned long arg)
|
||||
* @cmd == SCHEDOP_??? (scheduler operation).
|
||||
* @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
|
||||
* == SHUTDOWN_* code (SCHEDOP_shutdown)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Voluntarily yield the CPU.
|
||||
* @arg == NULL.
|
||||
*/
|
||||
#define SCHEDOP_yield 0
|
||||
|
||||
/*
|
||||
* Block execution of this VCPU until an event is received for processing.
|
||||
* If called with event upcalls masked, this operation will atomically
|
||||
* reenable event delivery and check for pending events before blocking the
|
||||
* VCPU. This avoids a "wakeup waiting" race.
|
||||
* @arg == NULL.
|
||||
*/
|
||||
#define SCHEDOP_block 1
|
||||
|
||||
/*
|
||||
* Halt execution of this domain (all VCPUs) and notify the system controller.
|
||||
* @arg == pointer to sched_shutdown structure.
|
||||
*/
|
||||
#define SCHEDOP_shutdown 2
|
||||
struct sched_shutdown {
|
||||
unsigned int reason; /* SHUTDOWN_* */
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
|
||||
|
||||
/*
|
||||
* Poll a set of event-channel ports. Return when one or more are pending. An
|
||||
* optional timeout may be specified.
|
||||
* @arg == pointer to sched_poll structure.
|
||||
*/
|
||||
#define SCHEDOP_poll 3
|
||||
struct sched_poll {
|
||||
GUEST_HANDLE(evtchn_port_t) ports;
|
||||
unsigned int nr_ports;
|
||||
uint64_t timeout;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
|
||||
|
||||
/*
|
||||
* Reason codes for SCHEDOP_shutdown. These may be interpreted by control
|
||||
* software to determine the appropriate action. For the most part, Xen does
|
||||
* not care about the shutdown code.
|
||||
*/
|
||||
#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */
|
||||
#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
|
||||
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
|
||||
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
|
||||
|
||||
#endif /* __XEN_PUBLIC_SCHED_H__ */
|
|
@ -0,0 +1,167 @@
|
|||
/******************************************************************************
|
||||
* vcpu.h
|
||||
*
|
||||
* VCPU initialisation, query, and hotplug.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright (c) 2005, Keir Fraser <keir@xensource.com>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_VCPU_H__
|
||||
#define __XEN_PUBLIC_VCPU_H__
|
||||
|
||||
/*
|
||||
* Prototype for this hypercall is:
|
||||
* int vcpu_op(int cmd, int vcpuid, void *extra_args)
|
||||
* @cmd == VCPUOP_??? (VCPU operation).
|
||||
* @vcpuid == VCPU to operate on.
|
||||
* @extra_args == Operation-specific extra arguments (NULL if none).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialise a VCPU. Each VCPU can be initialised only once. A
|
||||
* newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
|
||||
*
|
||||
* @extra_arg == pointer to vcpu_guest_context structure containing initial
|
||||
* state for the VCPU.
|
||||
*/
|
||||
#define VCPUOP_initialise 0
|
||||
|
||||
/*
|
||||
* Bring up a VCPU. This makes the VCPU runnable. This operation will fail
|
||||
* if the VCPU has not been initialised (VCPUOP_initialise).
|
||||
*/
|
||||
#define VCPUOP_up 1
|
||||
|
||||
/*
|
||||
* Bring down a VCPU (i.e., make it non-runnable).
|
||||
* There are a few caveats that callers should observe:
|
||||
* 1. This operation may return, and VCPU_is_up may return false, before the
|
||||
* VCPU stops running (i.e., the command is asynchronous). It is a good
|
||||
* idea to ensure that the VCPU has entered a non-critical loop before
|
||||
* bringing it down. Alternatively, this operation is guaranteed
|
||||
* synchronous if invoked by the VCPU itself.
|
||||
* 2. After a VCPU is initialised, there is currently no way to drop all its
|
||||
* references to domain memory. Even a VCPU that is down still holds
|
||||
* memory references via its pagetable base pointer and GDT. It is good
|
||||
* practise to move a VCPU onto an 'idle' or default page table, LDT and
|
||||
* GDT before bringing it down.
|
||||
*/
|
||||
#define VCPUOP_down 2
|
||||
|
||||
/* Returns 1 if the given VCPU is up. */
|
||||
#define VCPUOP_is_up 3
|
||||
|
||||
/*
|
||||
* Return information about the state and running time of a VCPU.
|
||||
* @extra_arg == pointer to vcpu_runstate_info structure.
|
||||
*/
|
||||
#define VCPUOP_get_runstate_info 4
|
||||
struct vcpu_runstate_info {
|
||||
/* VCPU's current state (RUNSTATE_*). */
|
||||
int state;
|
||||
/* When was current state entered (system time, ns)? */
|
||||
uint64_t state_entry_time;
|
||||
/*
|
||||
* Time spent in each RUNSTATE_* (ns). The sum of these times is
|
||||
* guaranteed not to drift from system time.
|
||||
*/
|
||||
uint64_t time[4];
|
||||
};
|
||||
|
||||
/* VCPU is currently running on a physical CPU. */
|
||||
#define RUNSTATE_running 0
|
||||
|
||||
/* VCPU is runnable, but not currently scheduled on any physical CPU. */
|
||||
#define RUNSTATE_runnable 1
|
||||
|
||||
/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
|
||||
#define RUNSTATE_blocked 2
|
||||
|
||||
/*
|
||||
* VCPU is not runnable, but it is not blocked.
|
||||
* This is a 'catch all' state for things like hotplug and pauses by the
|
||||
* system administrator (or for critical sections in the hypervisor).
|
||||
* RUNSTATE_blocked dominates this state (it is the preferred state).
|
||||
*/
|
||||
#define RUNSTATE_offline 3
|
||||
|
||||
/*
|
||||
* Register a shared memory area from which the guest may obtain its own
|
||||
* runstate information without needing to execute a hypercall.
|
||||
* Notes:
|
||||
* 1. The registered address may be virtual or physical, depending on the
|
||||
* platform. The virtual address should be registered on x86 systems.
|
||||
* 2. Only one shared area may be registered per VCPU. The shared area is
|
||||
* updated by the hypervisor each time the VCPU is scheduled. Thus
|
||||
* runstate.state will always be RUNSTATE_running and
|
||||
* runstate.state_entry_time will indicate the system time at which the
|
||||
* VCPU was last scheduled to run.
|
||||
* @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
|
||||
*/
|
||||
#define VCPUOP_register_runstate_memory_area 5
|
||||
struct vcpu_register_runstate_memory_area {
|
||||
union {
|
||||
struct vcpu_runstate_info *v;
|
||||
uint64_t p;
|
||||
} addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
|
||||
* which can be set via these commands. Periods smaller than one millisecond
|
||||
* may not be supported.
|
||||
*/
|
||||
#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */
|
||||
#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */
|
||||
struct vcpu_set_periodic_timer {
|
||||
uint64_t period_ns;
|
||||
};
|
||||
|
||||
/*
|
||||
* Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
|
||||
* timer which can be set via these commands.
|
||||
*/
|
||||
#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */
|
||||
#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
|
||||
struct vcpu_set_singleshot_timer {
|
||||
uint64_t timeout_abs_ns;
|
||||
uint32_t flags; /* VCPU_SSHOTTMR_??? */
|
||||
};
|
||||
|
||||
/* Flags to VCPUOP_set_singleshot_timer. */
|
||||
/* Require the timeout to be in the future (return -ETIME if it's passed). */
|
||||
#define _VCPU_SSHOTTMR_future (0)
|
||||
#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
|
||||
|
||||
/*
|
||||
* Register a memory location in the guest address space for the
|
||||
* vcpu_info structure. This allows the guest to place the vcpu_info
|
||||
* structure in a convenient place, such as in a per-cpu data area.
|
||||
* The pointer need not be page aligned, but the structure must not
|
||||
* cross a page boundary.
|
||||
*/
|
||||
#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
|
||||
struct vcpu_register_vcpu_info {
|
||||
uint32_t mfn; /* mfn of page to place vcpu_info */
|
||||
uint32_t offset; /* offset within page */
|
||||
};
|
||||
|
||||
#endif /* __XEN_PUBLIC_VCPU_H__ */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue