OpenCloudOS-Kernel/arch/tile/include/hv/iorpc.h

715 lines
25 KiB
C

/*
* Copyright 2012 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _HV_IORPC_H_
#define _HV_IORPC_H_
/**
*
* Error codes and struct definitions for the IO RPC library.
*
* The hypervisor's IO RPC component provides a convenient way for
* driver authors to proxy system calls between user space, linux, and
* the hypervisor driver. The core of the system is a set of Python
* files that take ".idl" files as input and generates the following
* source code:
*
* - _rpc_call() routines for use in userspace IO libraries. These
* routines take an argument list specified in the .idl file, pack the
* arguments in to a buffer, and read or write that buffer via the
* Linux iorpc driver.
*
* - dispatch_read() and dispatch_write() routines that hypervisor
* drivers can use to implement most of their dev_pread() and
* dev_pwrite() methods. These routines decode the incoming parameter
* blob, permission check and translate parameters where appropriate,
* and then invoke a callback routine for whichever RPC call has
* arrived. The driver simply implements the set of callback
* routines.
*
* The IO RPC system also includes the Linux 'iorpc' driver, which
* proxies calls between the userspace library and the hypervisor
* driver. The Linux driver is almost entirely device agnostic; it
* watches for special flags indicating cases where a memory buffer
* address might need to be translated, etc. As a result, driver
* writers can avoid many of the problem cases related to registering
* hardware resources like memory pages or interrupts. However, the
* drivers must be careful to obey the conventions documented below in
* order to work properly with the generic Linux iorpc driver.
*
* @section iorpc_domains Service Domains
*
* All iorpc-based drivers must support a notion of service domains.
* A service domain is basically an application context - state
* indicating resources that are allocated to that particular app
* which it may access and (perhaps) other applications may not
* access. Drivers can support any number of service domains they
* choose. In some cases the design is limited by a number of service
* domains supported by the IO hardware; in other cases the service
* domains are a purely software concept and the driver chooses a
* maximum number of domains based on how much state memory it is
* willing to preallocate.
*
* For example, the mPIPE driver only supports as many service domains
* as are supported by the mPIPE hardware. This limitation is
* required because the hardware implements its own MMIO protection
* scheme to allow large MMIO mappings while still protecting small
* register ranges within the page that should only be accessed by the
* hypervisor.
*
* In contrast, drivers with no hardware service domain limitations
* (for instance the TRIO shim) can implement an arbitrary number of
* service domains. In these cases, each service domain is limited to
* a carefully restricted set of legal MMIO addresses if necessary to
* keep one application from corrupting another application's state.
*
* @section iorpc_conventions System Call Conventions
*
* The driver's open routine is responsible for allocating a new
* service domain for each hv_dev_open() call. By convention, the
* return value from open() should be the service domain number on
* success, or GXIO_ERR_NO_SVC_DOM if no more service domains are
* available.
*
* The implementations of hv_dev_pread() and hv_dev_pwrite() are
* responsible for validating the devhdl value passed up by the
* client. Since the device handle returned by hv_dev_open() should
* embed the positive service domain number, drivers should make sure
* that DRV_HDL2BITS(devhdl) is a legal service domain. If the client
* passes an illegal service domain number, the routine should return
* GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been
* validated, the driver can copy to/from the client buffer and call
* the dispatch_read() or dispatch_write() methods created by the RPC
* generator.
*
* The hv_dev_close() implementation should reset all service domain
* state and put the service domain back on a free list for
* reallocation by a future application. In most cases, this will
* require executing a hardware reset or drain flow and denying any
* MMIO regions that were created for the service domain.
*
* @section iorpc_data Special Data Types
*
* The .idl file syntax allows the creation of syscalls with special
* parameters that require permission checks or translations as part
* of the system call path. Because of limitations in the code
* generator, APIs are generally limited to just one of these special
* parameters per system call, and they are sometimes required to be
* the first or last parameter to the call. Special parameters
* include:
*
* @subsection iorpc_mem_buffer MEM_BUFFER
*
* The MEM_BUFFER() datatype allows user space to "register" memory
* buffers with a device. Registering memory accomplishes two tasks:
* Linux keeps track of all buffers that might be modified by a
* hardware device, and the hardware device drivers bind registered
* buffers to particular hardware resources like ingress NotifRings.
* The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB,
* ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have
* certain alignment or that the user should be able to pass a "memory
* flags" word specifying attributes like nt_hint or IO cache pinning.
* The parser will accept multiple MEM_BUFFER() flags.
*
* Implementations must obey the following conventions when
* registering memory buffers via the iorpc flow. These rules are a
* result of the Linux driver implementation, which needs to keep
* track of how many times a particular page has been registered with
* the hardware so that it can release the page when all those
* registrations are cleared.
*
* - Memory registrations that refer to a resource which has already
* been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an
* error to register memory twice without resetting (i.e. closing) the
* resource in between. This convention keeps the Linux driver from
* having to track which particular devices a page is bound to.
*
* - At present, a memory registration is only cleared when the
* service domain is reset. In this case, the Linux driver simply
* closes the HV device file handle and then decrements the reference
* counts of all pages that were previously registered with the
* device.
*
* - In the future, we may add a mechanism for unregistering memory.
* One possible implementation would require that the user specify
* which buffer is currently registered. The HV would then verify
* that that page was actually the one currently mapped and return
* success or failure to Linux, which would then only decrement the
* page reference count if the addresses were mapped. Another scheme
* might allow Linux to pass a token to the HV to be returned when the
* resource is unmapped.
*
* @subsection iorpc_interrupt INTERRUPT
*
* The INTERRUPT .idl datatype allows the client to bind hardware
* interrupts to a particular combination of IPI parameters - CPU, IPI
* PL, and event bit number. This data is passed via a special
* datatype so that the Linux driver can validate the CPU and PL and
* the HV generic iorpc code can translate client CPUs to real CPUs.
*
* @subsection iorpc_pollfd_setup POLLFD_SETUP
*
* The POLLFD_SETUP .idl datatype allows the client to set up hardware
* interrupt bindings which are received by Linux but which are made
* visible to user processes as state transitions on a file descriptor;
* this allows user processes to use Linux primitives, such as poll(), to
* await particular hardware events. This data is passed via a special
* datatype so that the Linux driver may recognize the pollable file
* descriptor and translate it to a set of interrupt target information,
* and so that the HV generic iorpc code can translate client CPUs to real
* CPUs.
*
* @subsection iorpc_pollfd POLLFD
*
* The POLLFD .idl datatype allows manipulation of hardware interrupt
* bindings set up via the POLLFD_SETUP datatype; common operations are
* resetting the state of the requested interrupt events, and unbinding any
* bound interrupts. This data is passed via a special datatype so that
* the Linux driver may recognize the pollable file descriptor and
* translate it to an interrupt identifier previously supplied by the
* hypervisor as the result of an earlier pollfd_setup operation.
*
* @subsection iorpc_blob BLOB
*
* The BLOB .idl datatype allows the client to write an arbitrary
* length string of bytes up to the hypervisor driver. This can be
* useful for passing up large, arbitrarily structured data like
* classifier programs. The iorpc stack takes care of validating the
* buffer VA and CPA as the data passes up to the hypervisor. Unlike
* MEM_BUFFER(), the buffer is not registered - Linux does not bump
* page refcounts and the HV driver should not reuse the buffer once
* the system call is complete.
*
* @section iorpc_translation Translating User Space Calls
*
* The ::iorpc_offset structure describes the formatting of the offset
* that is passed to pread() or pwrite() as part of the generated RPC code.
* When the user calls up to Linux, the rpc code fills in all the fields of
* the offset, including a 16-bit opcode, a 16 bit format indicator, and 32
* bits of user-specified "sub-offset". The opcode indicates which syscall
* is being requested. The format indicates whether there is a "prefix
* struct" at the start of the memory buffer passed to pwrite(), and if so
* what data is in that prefix struct. These prefix structs are used to
* implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange
* to put data that needs translation and permission checks at the start of
* the buffer so that the Linux driver and generic portions of the HV iorpc
* code can easily access the data. The 32 bits of user-specified
* "sub-offset" are most useful for pread() calls where the user needs to
* also pass in a few bits indicating which register to read, etc.
*
* The Linux iorpc driver watches for system calls that contain prefix
* structs so that it can translate parameters and bump reference
* counts as appropriate. It does not (currently) have any knowledge
* of the per-device opcodes - it doesn't care what operation you're
* doing to mPIPE, so long as it can do all the generic book-keeping.
* The hv/iorpc.h header file defines all of the generic encoding bits
* needed to translate iorpc calls without knowing which particular
* opcode is being issued.
*
* @section iorpc_globals Global iorpc Calls
*
* Implementing mmap() required adding some special iorpc syscalls
* that are only called by the Linux driver, never by userspace.
* These include get_mmio_base() and check_mmio_offset(). These
* routines are described in globals.idl and must be included in every
* iorpc driver. By providing these routines in every driver, Linux's
* mmap implementation can easily get the PTE bits it needs and
* validate the PA offset without needing to know the per-device
* opcodes to perform those tasks.
*
* @section iorpc_kernel Supporting gxio APIs in the Kernel
*
* The iorpc code generator also supports generation of kernel code
* implementing the gxio APIs. This capability is currently used by
* the mPIPE network driver, and will likely be used by the TRIO root
* complex and endpoint drivers and perhaps an in-kernel crypto
* driver. Each driver that wants to instantiate iorpc calls in the
* kernel needs to generate a kernel version of the generate rpc code
* and (probably) copy any related gxio source files into the kernel.
* The mPIPE driver provides a good example of this pattern.
*/
#ifdef __KERNEL__
#include <linux/stddef.h>
#else
#include <stddef.h>
#endif
#if defined(__HV__)
#include <hv/hypervisor.h>
#elif defined(__KERNEL__)
#include <hv/hypervisor.h>
#include <linux/types.h>
#else
#include <stdint.h>
#endif
/** Code indicating translation services required within the RPC path.
* These indicate whether there is a translatable struct at the start
* of the RPC buffer and what information that struct contains.
*/
enum iorpc_format_e
{
/** No translation required, no prefix struct. */
IORPC_FORMAT_NONE,
/** No translation required, no prefix struct, no access to this
* operation from user space. */
IORPC_FORMAT_NONE_NOUSER,
/** Prefix struct contains user VA and size. */
IORPC_FORMAT_USER_MEM,
/** Prefix struct contains CPA, size, and homing bits. */
IORPC_FORMAT_KERNEL_MEM,
/** Prefix struct contains interrupt. */
IORPC_FORMAT_KERNEL_INTERRUPT,
/** Prefix struct contains user-level interrupt. */
IORPC_FORMAT_USER_INTERRUPT,
/** Prefix struct contains pollfd_setup (interrupt information). */
IORPC_FORMAT_KERNEL_POLLFD_SETUP,
/** Prefix struct contains user-level pollfd_setup (file descriptor). */
IORPC_FORMAT_USER_POLLFD_SETUP,
/** Prefix struct contains pollfd (interrupt cookie). */
IORPC_FORMAT_KERNEL_POLLFD,
/** Prefix struct contains user-level pollfd (file descriptor). */
IORPC_FORMAT_USER_POLLFD,
};
/** Generate an opcode given format and code. */
#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE))
/** The offset passed through the read() and write() system calls
combines an opcode with 32 bits of user-specified offset. */
union iorpc_offset
{
#ifndef __BIG_ENDIAN__
uint64_t offset; /**< All bits. */
struct
{
uint16_t code; /**< RPC code. */
uint16_t format; /**< iorpc_format_e */
uint32_t sub_offset; /**< caller-specified offset. */
};
uint32_t opcode; /**< Opcode combines code & format. */
#else
uint64_t offset; /**< All bits. */
struct
{
uint32_t sub_offset; /**< caller-specified offset. */
uint16_t format; /**< iorpc_format_e */
uint16_t code; /**< RPC code. */
};
struct
{
uint32_t padding;
uint32_t opcode; /**< Opcode combines code & format. */
};
#endif
};
/** Homing and cache hinting bits that can be used by IO devices. */
struct iorpc_mem_attr
{
unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */
unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */
unsigned int hfh:1; /**< Uses hash-for-home. */
unsigned int nt_hint:1; /**< Non-temporal hint. */
unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */
};
/** Set the nt_hint bit. */
#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0)
/** Set the IO pin bit. */
#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1)
/** A structure used to describe memory registration. Different
protection levels describe memory differently, so this union
contains all the different possible descriptions. As a request
moves up the call chain, each layer translates from one
description format to the next. In particular, the Linux iorpc
driver translates user VAs into CPAs and homing parameters. */
union iorpc_mem_buffer
{
struct
{
uint64_t va; /**< User virtual address. */
uint64_t size; /**< Buffer size. */
unsigned int flags; /**< nt_hint, IO pin. */
}
user; /**< Buffer as described by user apps. */
struct
{
unsigned long long cpa; /**< Client physical address. */
#if defined(__KERNEL__) || defined(__HV__)
size_t size; /**< Buffer size. */
HV_PTE pte; /**< PTE describing memory homing. */
#else
uint64_t size;
uint64_t pte;
#endif
unsigned int flags; /**< nt_hint, IO pin. */
}
kernel; /**< Buffer as described by kernel. */
struct
{
unsigned long long pa; /**< Physical address. */
size_t size; /**< Buffer size. */
struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */
}
hv; /**< Buffer parameters for HV driver. */
};
/** A structure used to describe interrupts. The format differs slightly
* for user and kernel interrupts. As with the mem_buffer_t, translation
* between the formats is done at each level. */
union iorpc_interrupt
{
struct
{
int cpu; /**< CPU. */
int event; /**< evt_num */
}
user; /**< Interrupt as described by user applications. */
struct
{
int x; /**< X coord. */
int y; /**< Y coord. */
int ipi; /**< int_num */
int event; /**< evt_num */
}
kernel; /**< Interrupt as described by the kernel. */
};
/** A structure used to describe interrupts used with poll(). The format
* differs significantly for requests from user to kernel, and kernel to
* hypervisor. As with the mem_buffer_t, translation between the formats
* is done at each level. */
union iorpc_pollfd_setup
{
struct
{
int fd; /**< Pollable file descriptor. */
}
user; /**< pollfd_setup as described by user applications. */
struct
{
int x; /**< X coord. */
int y; /**< Y coord. */
int ipi; /**< int_num */
int event; /**< evt_num */
}
kernel; /**< pollfd_setup as described by the kernel. */
};
/** A structure used to describe previously set up interrupts used with
* poll(). The format differs significantly for requests from user to
* kernel, and kernel to hypervisor. As with the mem_buffer_t, translation
* between the formats is done at each level. */
union iorpc_pollfd
{
struct
{
int fd; /**< Pollable file descriptor. */
}
user; /**< pollfd as described by user applications. */
struct
{
int cookie; /**< hv cookie returned by the pollfd_setup operation. */
}
kernel; /**< pollfd as described by the kernel. */
};
/** The various iorpc devices use error codes from -1100 to -1299.
*
* This range is distinct from netio (-700 to -799), the hypervisor
* (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099),
* gxcr (-1300 to -1399) and gxpci (-1400 to -1499).
*/
enum gxio_err_e {
/** Largest iorpc error number. */
GXIO_ERR_MAX = -1101,
/********************************************************/
/* Generic Error Codes */
/********************************************************/
/** Bad RPC opcode - possible version incompatibility. */
GXIO_ERR_OPCODE = -1101,
/** Invalid parameter. */
GXIO_ERR_INVAL = -1102,
/** Memory buffer did not meet alignment requirements. */
GXIO_ERR_ALIGNMENT = -1103,
/** Memory buffers must be coherent and cacheable. */
GXIO_ERR_COHERENCE = -1104,
/** Resource already initialized. */
GXIO_ERR_ALREADY_INIT = -1105,
/** No service domains available. */
GXIO_ERR_NO_SVC_DOM = -1106,
/** Illegal service domain number. */
GXIO_ERR_INVAL_SVC_DOM = -1107,
/** Illegal MMIO address. */
GXIO_ERR_MMIO_ADDRESS = -1108,
/** Illegal interrupt binding. */
GXIO_ERR_INTERRUPT = -1109,
/** Unreasonable client memory. */
GXIO_ERR_CLIENT_MEMORY = -1110,
/** No more IOTLB entries. */
GXIO_ERR_IOTLB_ENTRY = -1111,
/** Invalid memory size. */
GXIO_ERR_INVAL_MEMORY_SIZE = -1112,
/** Unsupported operation. */
GXIO_ERR_UNSUPPORTED_OP = -1113,
/** Insufficient DMA credits. */
GXIO_ERR_DMA_CREDITS = -1114,
/** Operation timed out. */
GXIO_ERR_TIMEOUT = -1115,
/** No such device or object. */
GXIO_ERR_NO_DEVICE = -1116,
/** Device or resource busy. */
GXIO_ERR_BUSY = -1117,
/** I/O error. */
GXIO_ERR_IO = -1118,
/** Permissions error. */
GXIO_ERR_PERM = -1119,
/********************************************************/
/* Test Device Error Codes */
/********************************************************/
/** Illegal register number. */
GXIO_TEST_ERR_REG_NUMBER = -1120,
/** Illegal buffer slot. */
GXIO_TEST_ERR_BUFFER_SLOT = -1121,
/********************************************************/
/* MPIPE Error Codes */
/********************************************************/
/** Invalid buffer size. */
GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131,
/** Cannot allocate buffer stack. */
GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140,
/** Invalid buffer stack number. */
GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141,
/** Cannot allocate NotifRing. */
GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142,
/** Invalid NotifRing number. */
GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143,
/** Cannot allocate NotifGroup. */
GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144,
/** Invalid NotifGroup number. */
GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145,
/** Cannot allocate bucket. */
GXIO_MPIPE_ERR_NO_BUCKET = -1146,
/** Invalid bucket number. */
GXIO_MPIPE_ERR_BAD_BUCKET = -1147,
/** Cannot allocate eDMA ring. */
GXIO_MPIPE_ERR_NO_EDMA_RING = -1148,
/** Invalid eDMA ring number. */
GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149,
/** Invalid channel number. */
GXIO_MPIPE_ERR_BAD_CHANNEL = -1150,
/** Bad configuration. */
GXIO_MPIPE_ERR_BAD_CONFIG = -1151,
/** Empty iqueue. */
GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152,
/** Empty rules. */
GXIO_MPIPE_ERR_RULES_EMPTY = -1160,
/** Full rules. */
GXIO_MPIPE_ERR_RULES_FULL = -1161,
/** Corrupt rules. */
GXIO_MPIPE_ERR_RULES_CORRUPT = -1162,
/** Invalid rules. */
GXIO_MPIPE_ERR_RULES_INVALID = -1163,
/** Classifier is too big. */
GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170,
/** Classifier is too complex. */
GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171,
/** Classifier has bad header. */
GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172,
/** Classifier has bad contents. */
GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173,
/** Classifier encountered invalid symbol. */
GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174,
/** Classifier encountered invalid bounds. */
GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175,
/** Classifier encountered invalid relocation. */
GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176,
/** Classifier encountered undefined symbol. */
GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177,
/********************************************************/
/* TRIO Error Codes */
/********************************************************/
/** Cannot allocate memory map region. */
GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180,
/** Invalid memory map region number. */
GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181,
/** Cannot allocate scatter queue. */
GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182,
/** Invalid scatter queue number. */
GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183,
/** Cannot allocate push DMA ring. */
GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184,
/** Invalid push DMA ring index. */
GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185,
/** Cannot allocate pull DMA ring. */
GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186,
/** Invalid pull DMA ring index. */
GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187,
/** Cannot allocate PIO region. */
GXIO_TRIO_ERR_NO_PIO = -1188,
/** Invalid PIO region index. */
GXIO_TRIO_ERR_BAD_PIO = -1189,
/** Cannot allocate ASID. */
GXIO_TRIO_ERR_NO_ASID = -1190,
/** Invalid ASID. */
GXIO_TRIO_ERR_BAD_ASID = -1191,
/********************************************************/
/* MICA Error Codes */
/********************************************************/
/** No such accelerator type. */
GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220,
/** Cannot allocate context. */
GXIO_MICA_ERR_NO_CONTEXT = -1221,
/** PKA command queue is full, can't add another command. */
GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222,
/** PKA result queue is empty, can't get a result from the queue. */
GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223,
/********************************************************/
/* GPIO Error Codes */
/********************************************************/
/** Pin not available. Either the physical pin does not exist, or
* it is reserved by the hypervisor for system usage. */
GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240,
/** Pin busy. The pin exists, and is available for use via GXIO, but
* it has been attached by some other process or driver. */
GXIO_GPIO_ERR_PIN_BUSY = -1241,
/** Cannot access unattached pin. One or more of the pins being
* manipulated by this call are not attached to the requesting
* context. */
GXIO_GPIO_ERR_PIN_UNATTACHED = -1242,
/** Invalid I/O mode for pin. The wiring of the pin in the system
* is such that the I/O mode or electrical control parameters
* requested could cause damage. */
GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243,
/** Smallest iorpc error number. */
GXIO_ERR_MIN = -1299
};
#endif /* !_HV_IORPC_H_ */