drm/i915/gt: Split intel_ring_submission
Split the legacy submission backend from the common CS ring buffer handling. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191024100344.5041-1-chris@chris-wilson.co.uk
This commit is contained in:
parent
2c9a49150d
commit
2871ea85c1
|
@ -89,11 +89,12 @@ gt-y += \
|
||||||
gt/intel_gt_requests.o \
|
gt/intel_gt_requests.o \
|
||||||
gt/intel_llc.o \
|
gt/intel_llc.o \
|
||||||
gt/intel_lrc.o \
|
gt/intel_lrc.o \
|
||||||
|
gt/intel_mocs.o \
|
||||||
gt/intel_rc6.o \
|
gt/intel_rc6.o \
|
||||||
gt/intel_renderstate.o \
|
gt/intel_renderstate.o \
|
||||||
gt/intel_reset.o \
|
gt/intel_reset.o \
|
||||||
gt/intel_ringbuffer.o \
|
gt/intel_ring.o \
|
||||||
gt/intel_mocs.o \
|
gt/intel_ring_submission.o \
|
||||||
gt/intel_sseu.o \
|
gt/intel_sseu.o \
|
||||||
gt/intel_timeline.o \
|
gt/intel_timeline.o \
|
||||||
gt/intel_workarounds.o
|
gt/intel_workarounds.o
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include <drm/i915_drm.h>
|
#include <drm/i915_drm.h>
|
||||||
|
|
||||||
#include "gem/i915_gem_pm.h"
|
#include "gem/i915_gem_pm.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "i915_reg.h"
|
#include "i915_reg.h"
|
||||||
|
|
|
@ -69,9 +69,10 @@
|
||||||
|
|
||||||
#include <drm/i915_drm.h>
|
#include <drm/i915_drm.h>
|
||||||
|
|
||||||
#include "gt/intel_lrc_reg.h"
|
|
||||||
#include "gt/intel_engine_heartbeat.h"
|
#include "gt/intel_engine_heartbeat.h"
|
||||||
#include "gt/intel_engine_user.h"
|
#include "gt/intel_engine_user.h"
|
||||||
|
#include "gt/intel_lrc_reg.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_gem_context.h"
|
#include "i915_gem_context.h"
|
||||||
#include "i915_globals.h"
|
#include "i915_globals.h"
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "gt/intel_engine_pool.h"
|
#include "gt/intel_engine_pool.h"
|
||||||
#include "gt/intel_gt.h"
|
#include "gt/intel_gt.h"
|
||||||
#include "gt/intel_gt_pm.h"
|
#include "gt/intel_gt_pm.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "i915_gem_clflush.h"
|
#include "i915_gem_clflush.h"
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "gt/intel_engine_pm.h"
|
#include "gt/intel_engine_pm.h"
|
||||||
#include "gt/intel_engine_pool.h"
|
#include "gt/intel_engine_pool.h"
|
||||||
#include "gt/intel_gt.h"
|
#include "gt/intel_gt.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
#include "i915_gem_clflush.h"
|
#include "i915_gem_clflush.h"
|
||||||
#include "i915_gem_object_blt.h"
|
#include "i915_gem_object_blt.h"
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include "gt/intel_gt.h"
|
#include "gt/intel_gt.h"
|
||||||
#include "gt/intel_gt_pm.h"
|
#include "gt/intel_gt_pm.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_selftest.h"
|
#include "i915_selftest.h"
|
||||||
#include "selftests/i915_random.h"
|
#include "selftests/i915_random.h"
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "intel_context.h"
|
#include "intel_context.h"
|
||||||
#include "intel_engine.h"
|
#include "intel_engine.h"
|
||||||
#include "intel_engine_pm.h"
|
#include "intel_engine_pm.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
static struct i915_global_context {
|
static struct i915_global_context {
|
||||||
struct i915_global base;
|
struct i915_global base;
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "i915_active.h"
|
#include "i915_active.h"
|
||||||
#include "intel_context_types.h"
|
#include "intel_context_types.h"
|
||||||
#include "intel_engine_types.h"
|
#include "intel_engine_types.h"
|
||||||
|
#include "intel_ring_types.h"
|
||||||
#include "intel_timeline_types.h"
|
#include "intel_timeline_types.h"
|
||||||
|
|
||||||
void intel_context_init(struct intel_context *ce,
|
void intel_context_init(struct intel_context *ce,
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
#include "intel_workarounds.h"
|
#include "intel_workarounds.h"
|
||||||
|
|
||||||
struct drm_printer;
|
struct drm_printer;
|
||||||
|
|
||||||
struct intel_gt;
|
struct intel_gt;
|
||||||
|
|
||||||
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
|
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
|
||||||
|
@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
|
||||||
#define I915_HWS_CSB_WRITE_INDEX 0x1f
|
#define I915_HWS_CSB_WRITE_INDEX 0x1f
|
||||||
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
|
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
|
||||||
|
|
||||||
struct intel_ring *
|
|
||||||
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
|
|
||||||
int intel_ring_pin(struct intel_ring *ring);
|
|
||||||
void intel_ring_reset(struct intel_ring *ring, u32 tail);
|
|
||||||
unsigned int intel_ring_update_space(struct intel_ring *ring);
|
|
||||||
void intel_ring_unpin(struct intel_ring *ring);
|
|
||||||
void intel_ring_free(struct kref *ref);
|
|
||||||
|
|
||||||
static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
|
|
||||||
{
|
|
||||||
kref_get(&ring->ref);
|
|
||||||
return ring;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void intel_ring_put(struct intel_ring *ring)
|
|
||||||
{
|
|
||||||
kref_put(&ring->ref, intel_ring_free);
|
|
||||||
}
|
|
||||||
|
|
||||||
void intel_engine_stop(struct intel_engine_cs *engine);
|
void intel_engine_stop(struct intel_engine_cs *engine);
|
||||||
void intel_engine_cleanup(struct intel_engine_cs *engine);
|
void intel_engine_cleanup(struct intel_engine_cs *engine);
|
||||||
|
|
||||||
int __must_check intel_ring_cacheline_align(struct i915_request *rq);
|
|
||||||
|
|
||||||
u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
|
|
||||||
|
|
||||||
static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
|
|
||||||
{
|
|
||||||
/* Dummy function.
|
|
||||||
*
|
|
||||||
* This serves as a placeholder in the code so that the reader
|
|
||||||
* can compare against the preceding intel_ring_begin() and
|
|
||||||
* check that the number of dwords emitted matches the space
|
|
||||||
* reserved for the command packet (i.e. the value passed to
|
|
||||||
* intel_ring_begin()).
|
|
||||||
*/
|
|
||||||
GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
|
|
||||||
{
|
|
||||||
return pos & (ring->size - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
intel_ring_offset_valid(const struct intel_ring *ring,
|
|
||||||
unsigned int pos)
|
|
||||||
{
|
|
||||||
if (pos & -ring->size) /* must be strictly within the ring */
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
|
|
||||||
{
|
|
||||||
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
|
|
||||||
u32 offset = addr - rq->ring->vaddr;
|
|
||||||
GEM_BUG_ON(offset > rq->ring->size);
|
|
||||||
return intel_ring_wrap(rq->ring, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
|
|
||||||
{
|
|
||||||
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* "Ring Buffer Use"
|
|
||||||
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6
|
|
||||||
* Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
|
|
||||||
* Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
|
|
||||||
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
|
|
||||||
* same cacheline, the Head Pointer must not be greater than the Tail
|
|
||||||
* Pointer."
|
|
||||||
*
|
|
||||||
* We use ring->head as the last known location of the actual RING_HEAD,
|
|
||||||
* it may have advanced but in the worst case it is equally the same
|
|
||||||
* as ring->head and so we should never program RING_TAIL to advance
|
|
||||||
* into the same cacheline as ring->head.
|
|
||||||
*/
|
|
||||||
#define cacheline(a) round_down(a, CACHELINE_BYTES)
|
|
||||||
GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
|
|
||||||
tail < ring->head);
|
|
||||||
#undef cacheline
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned int
|
|
||||||
intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
|
|
||||||
{
|
|
||||||
/* Whilst writes to the tail are strictly order, there is no
|
|
||||||
* serialisation between readers and the writers. The tail may be
|
|
||||||
* read by i915_request_retire() just as it is being updated
|
|
||||||
* by execlists, as although the breadcrumb is complete, the context
|
|
||||||
* switch hasn't been seen.
|
|
||||||
*/
|
|
||||||
assert_ring_tail_valid(ring, tail);
|
|
||||||
ring->tail = tail;
|
|
||||||
return tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned int
|
|
||||||
__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
|
|
||||||
* same cacheline, the Head Pointer must not be greater than the Tail
|
|
||||||
* Pointer."
|
|
||||||
*/
|
|
||||||
GEM_BUG_ON(!is_power_of_2(size));
|
|
||||||
return (head - tail - CACHELINE_BYTES) & (size - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int intel_engines_init_mmio(struct intel_gt *gt);
|
int intel_engines_init_mmio(struct intel_gt *gt);
|
||||||
int intel_engines_setup(struct intel_gt *gt);
|
int intel_engines_setup(struct intel_gt *gt);
|
||||||
int intel_engines_init(struct intel_gt *gt);
|
int intel_engines_init(struct intel_gt *gt);
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
#include "intel_context.h"
|
#include "intel_context.h"
|
||||||
#include "intel_lrc.h"
|
#include "intel_lrc.h"
|
||||||
#include "intel_reset.h"
|
#include "intel_reset.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
/* Haswell does have the CXT_SIZE register however it does not appear to be
|
/* Haswell does have the CXT_SIZE register however it does not appear to be
|
||||||
* valid. Now, docs explain in dwords what is in the context object. The full
|
* valid. Now, docs explain in dwords what is in the context object. The full
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "intel_gt.h"
|
#include "intel_gt.h"
|
||||||
#include "intel_gt_pm.h"
|
#include "intel_gt_pm.h"
|
||||||
#include "intel_rc6.h"
|
#include "intel_rc6.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
static int __engine_unpark(struct intel_wakeref *wf)
|
static int __engine_unpark(struct intel_wakeref *wf)
|
||||||
{
|
{
|
||||||
|
|
|
@ -59,6 +59,7 @@ struct i915_gem_context;
|
||||||
struct i915_request;
|
struct i915_request;
|
||||||
struct i915_sched_attr;
|
struct i915_sched_attr;
|
||||||
struct intel_gt;
|
struct intel_gt;
|
||||||
|
struct intel_ring;
|
||||||
struct intel_uncore;
|
struct intel_uncore;
|
||||||
|
|
||||||
typedef u8 intel_engine_mask_t;
|
typedef u8 intel_engine_mask_t;
|
||||||
|
@ -77,32 +78,6 @@ struct intel_instdone {
|
||||||
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
|
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct intel_ring {
|
|
||||||
struct kref ref;
|
|
||||||
struct i915_vma *vma;
|
|
||||||
void *vaddr;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* As we have two types of rings, one global to the engine used
|
|
||||||
* by ringbuffer submission and those that are exclusive to a
|
|
||||||
* context used by execlists, we have to play safe and allow
|
|
||||||
* atomic updates to the pin_count. However, the actual pinning
|
|
||||||
* of the context is either done during initialisation for
|
|
||||||
* ringbuffer submission or serialised as part of the context
|
|
||||||
* pinning for execlists, and so we do not need a mutex ourselves
|
|
||||||
* to serialise intel_ring_pin/intel_ring_unpin.
|
|
||||||
*/
|
|
||||||
atomic_t pin_count;
|
|
||||||
|
|
||||||
u32 head;
|
|
||||||
u32 tail;
|
|
||||||
u32 emit;
|
|
||||||
|
|
||||||
u32 space;
|
|
||||||
u32 size;
|
|
||||||
u32 effective_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we use a single page to load ctx workarounds so all of these
|
* we use a single page to load ctx workarounds so all of these
|
||||||
* values are referred in terms of dwords
|
* values are referred in terms of dwords
|
||||||
|
|
|
@ -145,6 +145,7 @@
|
||||||
#include "intel_lrc_reg.h"
|
#include "intel_lrc_reg.h"
|
||||||
#include "intel_mocs.h"
|
#include "intel_mocs.h"
|
||||||
#include "intel_reset.h"
|
#include "intel_reset.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
#include "intel_workarounds.h"
|
#include "intel_workarounds.h"
|
||||||
|
|
||||||
#define RING_EXECLIST_QFULL (1 << 0x2)
|
#define RING_EXECLIST_QFULL (1 << 0x2)
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "intel_gt.h"
|
#include "intel_gt.h"
|
||||||
#include "intel_mocs.h"
|
#include "intel_mocs.h"
|
||||||
#include "intel_lrc.h"
|
#include "intel_lrc.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
/* structures required */
|
/* structures required */
|
||||||
struct drm_i915_mocs_entry {
|
struct drm_i915_mocs_entry {
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "intel_renderstate.h"
|
#include "intel_renderstate.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
struct intel_renderstate {
|
struct intel_renderstate {
|
||||||
const struct intel_renderstate_rodata *rodata;
|
const struct intel_renderstate_rodata *rodata;
|
||||||
|
|
|
@ -0,0 +1,321 @@
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
* Copyright © 2019 Intel Corporation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gem/i915_gem_object.h"
|
||||||
|
#include "i915_drv.h"
|
||||||
|
#include "i915_vma.h"
|
||||||
|
#include "intel_engine.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
#include "intel_timeline.h"
|
||||||
|
|
||||||
|
unsigned int intel_ring_update_space(struct intel_ring *ring)
|
||||||
|
{
|
||||||
|
unsigned int space;
|
||||||
|
|
||||||
|
space = __intel_ring_space(ring->head, ring->emit, ring->size);
|
||||||
|
|
||||||
|
ring->space = space;
|
||||||
|
return space;
|
||||||
|
}
|
||||||
|
|
||||||
|
int intel_ring_pin(struct intel_ring *ring)
|
||||||
|
{
|
||||||
|
struct i915_vma *vma = ring->vma;
|
||||||
|
unsigned int flags;
|
||||||
|
void *addr;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (atomic_fetch_inc(&ring->pin_count))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
flags = PIN_GLOBAL;
|
||||||
|
|
||||||
|
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
|
||||||
|
flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
|
||||||
|
|
||||||
|
if (vma->obj->stolen)
|
||||||
|
flags |= PIN_MAPPABLE;
|
||||||
|
else
|
||||||
|
flags |= PIN_HIGH;
|
||||||
|
|
||||||
|
ret = i915_vma_pin(vma, 0, 0, flags);
|
||||||
|
if (unlikely(ret))
|
||||||
|
goto err_unpin;
|
||||||
|
|
||||||
|
if (i915_vma_is_map_and_fenceable(vma))
|
||||||
|
addr = (void __force *)i915_vma_pin_iomap(vma);
|
||||||
|
else
|
||||||
|
addr = i915_gem_object_pin_map(vma->obj,
|
||||||
|
i915_coherent_map_type(vma->vm->i915));
|
||||||
|
if (IS_ERR(addr)) {
|
||||||
|
ret = PTR_ERR(addr);
|
||||||
|
goto err_ring;
|
||||||
|
}
|
||||||
|
|
||||||
|
i915_vma_make_unshrinkable(vma);
|
||||||
|
|
||||||
|
GEM_BUG_ON(ring->vaddr);
|
||||||
|
ring->vaddr = addr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_ring:
|
||||||
|
i915_vma_unpin(vma);
|
||||||
|
err_unpin:
|
||||||
|
atomic_dec(&ring->pin_count);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void intel_ring_reset(struct intel_ring *ring, u32 tail)
|
||||||
|
{
|
||||||
|
tail = intel_ring_wrap(ring, tail);
|
||||||
|
ring->tail = tail;
|
||||||
|
ring->head = tail;
|
||||||
|
ring->emit = tail;
|
||||||
|
intel_ring_update_space(ring);
|
||||||
|
}
|
||||||
|
|
||||||
|
void intel_ring_unpin(struct intel_ring *ring)
|
||||||
|
{
|
||||||
|
struct i915_vma *vma = ring->vma;
|
||||||
|
|
||||||
|
if (!atomic_dec_and_test(&ring->pin_count))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Discard any unused bytes beyond that submitted to hw. */
|
||||||
|
intel_ring_reset(ring, ring->emit);
|
||||||
|
|
||||||
|
i915_vma_unset_ggtt_write(vma);
|
||||||
|
if (i915_vma_is_map_and_fenceable(vma))
|
||||||
|
i915_vma_unpin_iomap(vma);
|
||||||
|
else
|
||||||
|
i915_gem_object_unpin_map(vma->obj);
|
||||||
|
|
||||||
|
GEM_BUG_ON(!ring->vaddr);
|
||||||
|
ring->vaddr = NULL;
|
||||||
|
|
||||||
|
i915_vma_unpin(vma);
|
||||||
|
i915_vma_make_purgeable(vma);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
|
||||||
|
{
|
||||||
|
struct i915_address_space *vm = &ggtt->vm;
|
||||||
|
struct drm_i915_private *i915 = vm->i915;
|
||||||
|
struct drm_i915_gem_object *obj;
|
||||||
|
struct i915_vma *vma;
|
||||||
|
|
||||||
|
obj = i915_gem_object_create_stolen(i915, size);
|
||||||
|
if (IS_ERR(obj))
|
||||||
|
obj = i915_gem_object_create_internal(i915, size);
|
||||||
|
if (IS_ERR(obj))
|
||||||
|
return ERR_CAST(obj);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark ring buffers as read-only from GPU side (so no stray overwrites)
|
||||||
|
* if supported by the platform's GGTT.
|
||||||
|
*/
|
||||||
|
if (vm->has_read_only)
|
||||||
|
i915_gem_object_set_readonly(obj);
|
||||||
|
|
||||||
|
vma = i915_vma_instance(obj, vm, NULL);
|
||||||
|
if (IS_ERR(vma))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
return vma;
|
||||||
|
|
||||||
|
err:
|
||||||
|
i915_gem_object_put(obj);
|
||||||
|
return vma;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct intel_ring *
|
||||||
|
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
|
||||||
|
{
|
||||||
|
struct drm_i915_private *i915 = engine->i915;
|
||||||
|
struct intel_ring *ring;
|
||||||
|
struct i915_vma *vma;
|
||||||
|
|
||||||
|
GEM_BUG_ON(!is_power_of_2(size));
|
||||||
|
GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
|
||||||
|
|
||||||
|
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
|
||||||
|
if (!ring)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
kref_init(&ring->ref);
|
||||||
|
ring->size = size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Workaround an erratum on the i830 which causes a hang if
|
||||||
|
* the TAIL pointer points to within the last 2 cachelines
|
||||||
|
* of the buffer.
|
||||||
|
*/
|
||||||
|
ring->effective_size = size;
|
||||||
|
if (IS_I830(i915) || IS_I845G(i915))
|
||||||
|
ring->effective_size -= 2 * CACHELINE_BYTES;
|
||||||
|
|
||||||
|
intel_ring_update_space(ring);
|
||||||
|
|
||||||
|
vma = create_ring_vma(engine->gt->ggtt, size);
|
||||||
|
if (IS_ERR(vma)) {
|
||||||
|
kfree(ring);
|
||||||
|
return ERR_CAST(vma);
|
||||||
|
}
|
||||||
|
ring->vma = vma;
|
||||||
|
|
||||||
|
return ring;
|
||||||
|
}
|
||||||
|
|
||||||
|
void intel_ring_free(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
|
||||||
|
|
||||||
|
i915_vma_put(ring->vma);
|
||||||
|
kfree(ring);
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline int
|
||||||
|
wait_for_space(struct intel_ring *ring,
|
||||||
|
struct intel_timeline *tl,
|
||||||
|
unsigned int bytes)
|
||||||
|
{
|
||||||
|
struct i915_request *target;
|
||||||
|
long timeout;
|
||||||
|
|
||||||
|
if (intel_ring_update_space(ring) >= bytes)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
GEM_BUG_ON(list_empty(&tl->requests));
|
||||||
|
list_for_each_entry(target, &tl->requests, link) {
|
||||||
|
if (target->ring != ring)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Would completion of this request free enough space? */
|
||||||
|
if (bytes <= __intel_ring_space(target->postfix,
|
||||||
|
ring->emit, ring->size))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (GEM_WARN_ON(&target->link == &tl->requests))
|
||||||
|
return -ENOSPC;
|
||||||
|
|
||||||
|
timeout = i915_request_wait(target,
|
||||||
|
I915_WAIT_INTERRUPTIBLE,
|
||||||
|
MAX_SCHEDULE_TIMEOUT);
|
||||||
|
if (timeout < 0)
|
||||||
|
return timeout;
|
||||||
|
|
||||||
|
i915_request_retire_upto(target);
|
||||||
|
|
||||||
|
intel_ring_update_space(ring);
|
||||||
|
GEM_BUG_ON(ring->space < bytes);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
|
||||||
|
{
|
||||||
|
struct intel_ring *ring = rq->ring;
|
||||||
|
const unsigned int remain_usable = ring->effective_size - ring->emit;
|
||||||
|
const unsigned int bytes = num_dwords * sizeof(u32);
|
||||||
|
unsigned int need_wrap = 0;
|
||||||
|
unsigned int total_bytes;
|
||||||
|
u32 *cs;
|
||||||
|
|
||||||
|
/* Packets must be qword aligned. */
|
||||||
|
GEM_BUG_ON(num_dwords & 1);
|
||||||
|
|
||||||
|
total_bytes = bytes + rq->reserved_space;
|
||||||
|
GEM_BUG_ON(total_bytes > ring->effective_size);
|
||||||
|
|
||||||
|
if (unlikely(total_bytes > remain_usable)) {
|
||||||
|
const int remain_actual = ring->size - ring->emit;
|
||||||
|
|
||||||
|
if (bytes > remain_usable) {
|
||||||
|
/*
|
||||||
|
* Not enough space for the basic request. So need to
|
||||||
|
* flush out the remainder and then wait for
|
||||||
|
* base + reserved.
|
||||||
|
*/
|
||||||
|
total_bytes += remain_actual;
|
||||||
|
need_wrap = remain_actual | 1;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* The base request will fit but the reserved space
|
||||||
|
* falls off the end. So we don't need an immediate
|
||||||
|
* wrap and only need to effectively wait for the
|
||||||
|
* reserved size from the start of ringbuffer.
|
||||||
|
*/
|
||||||
|
total_bytes = rq->reserved_space + remain_actual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(total_bytes > ring->space)) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Space is reserved in the ringbuffer for finalising the
|
||||||
|
* request, as that cannot be allowed to fail. During request
|
||||||
|
* finalisation, reserved_space is set to 0 to stop the
|
||||||
|
* overallocation and the assumption is that then we never need
|
||||||
|
* to wait (which has the risk of failing with EINTR).
|
||||||
|
*
|
||||||
|
* See also i915_request_alloc() and i915_request_add().
|
||||||
|
*/
|
||||||
|
GEM_BUG_ON(!rq->reserved_space);
|
||||||
|
|
||||||
|
ret = wait_for_space(ring,
|
||||||
|
i915_request_timeline(rq),
|
||||||
|
total_bytes);
|
||||||
|
if (unlikely(ret))
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(need_wrap)) {
|
||||||
|
need_wrap &= ~1;
|
||||||
|
GEM_BUG_ON(need_wrap > ring->space);
|
||||||
|
GEM_BUG_ON(ring->emit + need_wrap > ring->size);
|
||||||
|
GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
|
||||||
|
|
||||||
|
/* Fill the tail with MI_NOOP */
|
||||||
|
memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
|
||||||
|
ring->space -= need_wrap;
|
||||||
|
ring->emit = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
GEM_BUG_ON(ring->emit > ring->size - bytes);
|
||||||
|
GEM_BUG_ON(ring->space < bytes);
|
||||||
|
cs = ring->vaddr + ring->emit;
|
||||||
|
GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
|
||||||
|
ring->emit += bytes;
|
||||||
|
ring->space -= bytes;
|
||||||
|
|
||||||
|
return cs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Align the ring tail to a cacheline boundary */
|
||||||
|
int intel_ring_cacheline_align(struct i915_request *rq)
|
||||||
|
{
|
||||||
|
int num_dwords;
|
||||||
|
void *cs;
|
||||||
|
|
||||||
|
num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
|
||||||
|
if (num_dwords == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
num_dwords = CACHELINE_DWORDS - num_dwords;
|
||||||
|
GEM_BUG_ON(num_dwords & 1);
|
||||||
|
|
||||||
|
cs = intel_ring_begin(rq, num_dwords);
|
||||||
|
if (IS_ERR(cs))
|
||||||
|
return PTR_ERR(cs);
|
||||||
|
|
||||||
|
memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
|
||||||
|
intel_ring_advance(rq, cs + num_dwords);
|
||||||
|
|
||||||
|
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
* Copyright © 2019 Intel Corporation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INTEL_RING_H
|
||||||
|
#define INTEL_RING_H
|
||||||
|
|
||||||
|
#include "i915_gem.h" /* GEM_BUG_ON */
|
||||||
|
#include "i915_request.h"
|
||||||
|
#include "intel_ring_types.h"
|
||||||
|
|
||||||
|
struct intel_engine_cs;
|
||||||
|
|
||||||
|
struct intel_ring *
|
||||||
|
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
|
||||||
|
|
||||||
|
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
|
||||||
|
int intel_ring_cacheline_align(struct i915_request *rq);
|
||||||
|
|
||||||
|
unsigned int intel_ring_update_space(struct intel_ring *ring);
|
||||||
|
|
||||||
|
int intel_ring_pin(struct intel_ring *ring);
|
||||||
|
void intel_ring_unpin(struct intel_ring *ring);
|
||||||
|
void intel_ring_reset(struct intel_ring *ring, u32 tail);
|
||||||
|
|
||||||
|
void intel_ring_free(struct kref *ref);
|
||||||
|
|
||||||
|
static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
|
||||||
|
{
|
||||||
|
kref_get(&ring->ref);
|
||||||
|
return ring;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void intel_ring_put(struct intel_ring *ring)
|
||||||
|
{
|
||||||
|
kref_put(&ring->ref, intel_ring_free);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
|
||||||
|
{
|
||||||
|
/* Dummy function.
|
||||||
|
*
|
||||||
|
* This serves as a placeholder in the code so that the reader
|
||||||
|
* can compare against the preceding intel_ring_begin() and
|
||||||
|
* check that the number of dwords emitted matches the space
|
||||||
|
* reserved for the command packet (i.e. the value passed to
|
||||||
|
* intel_ring_begin()).
|
||||||
|
*/
|
||||||
|
GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
|
||||||
|
{
|
||||||
|
return pos & (ring->size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
intel_ring_offset_valid(const struct intel_ring *ring,
|
||||||
|
unsigned int pos)
|
||||||
|
{
|
||||||
|
if (pos & -ring->size) /* must be strictly within the ring */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
|
||||||
|
{
|
||||||
|
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
|
||||||
|
u32 offset = addr - rq->ring->vaddr;
|
||||||
|
GEM_BUG_ON(offset > rq->ring->size);
|
||||||
|
return intel_ring_wrap(rq->ring, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
|
||||||
|
{
|
||||||
|
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "Ring Buffer Use"
|
||||||
|
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6
|
||||||
|
* Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
|
||||||
|
* Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
|
||||||
|
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
|
||||||
|
* same cacheline, the Head Pointer must not be greater than the Tail
|
||||||
|
* Pointer."
|
||||||
|
*
|
||||||
|
* We use ring->head as the last known location of the actual RING_HEAD,
|
||||||
|
* it may have advanced but in the worst case it is equally the same
|
||||||
|
* as ring->head and so we should never program RING_TAIL to advance
|
||||||
|
* into the same cacheline as ring->head.
|
||||||
|
*/
|
||||||
|
#define cacheline(a) round_down(a, CACHELINE_BYTES)
|
||||||
|
GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
|
||||||
|
tail < ring->head);
|
||||||
|
#undef cacheline
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
|
||||||
|
{
|
||||||
|
/* Whilst writes to the tail are strictly order, there is no
|
||||||
|
* serialisation between readers and the writers. The tail may be
|
||||||
|
* read by i915_request_retire() just as it is being updated
|
||||||
|
* by execlists, as although the breadcrumb is complete, the context
|
||||||
|
* switch hasn't been seen.
|
||||||
|
*/
|
||||||
|
assert_ring_tail_valid(ring, tail);
|
||||||
|
ring->tail = tail;
|
||||||
|
return tail;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
|
||||||
|
* same cacheline, the Head Pointer must not be greater than the Tail
|
||||||
|
* Pointer."
|
||||||
|
*/
|
||||||
|
GEM_BUG_ON(!is_power_of_2(size));
|
||||||
|
return (head - tail - CACHELINE_BYTES) & (size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* INTEL_RING_H */
|
|
@ -40,6 +40,7 @@
|
||||||
#include "intel_gt_irq.h"
|
#include "intel_gt_irq.h"
|
||||||
#include "intel_gt_pm_irq.h"
|
#include "intel_gt_pm_irq.h"
|
||||||
#include "intel_reset.h"
|
#include "intel_reset.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
#include "intel_workarounds.h"
|
#include "intel_workarounds.h"
|
||||||
|
|
||||||
/* Rough estimate of the typical request size, performing a flush,
|
/* Rough estimate of the typical request size, performing a flush,
|
||||||
|
@ -47,16 +48,6 @@
|
||||||
*/
|
*/
|
||||||
#define LEGACY_REQUEST_SIZE 200
|
#define LEGACY_REQUEST_SIZE 200
|
||||||
|
|
||||||
unsigned int intel_ring_update_space(struct intel_ring *ring)
|
|
||||||
{
|
|
||||||
unsigned int space;
|
|
||||||
|
|
||||||
space = __intel_ring_space(ring->head, ring->emit, ring->size);
|
|
||||||
|
|
||||||
ring->space = space;
|
|
||||||
return space;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
gen2_render_ring_flush(struct i915_request *rq, u32 mode)
|
gen2_render_ring_flush(struct i915_request *rq, u32 mode)
|
||||||
{
|
{
|
||||||
|
@ -1186,162 +1177,6 @@ i915_emit_bb_start(struct i915_request *rq,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int intel_ring_pin(struct intel_ring *ring)
|
|
||||||
{
|
|
||||||
struct i915_vma *vma = ring->vma;
|
|
||||||
unsigned int flags;
|
|
||||||
void *addr;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (atomic_fetch_inc(&ring->pin_count))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
flags = PIN_GLOBAL;
|
|
||||||
|
|
||||||
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
|
|
||||||
flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
|
|
||||||
|
|
||||||
if (vma->obj->stolen)
|
|
||||||
flags |= PIN_MAPPABLE;
|
|
||||||
else
|
|
||||||
flags |= PIN_HIGH;
|
|
||||||
|
|
||||||
ret = i915_vma_pin(vma, 0, 0, flags);
|
|
||||||
if (unlikely(ret))
|
|
||||||
goto err_unpin;
|
|
||||||
|
|
||||||
if (i915_vma_is_map_and_fenceable(vma))
|
|
||||||
addr = (void __force *)i915_vma_pin_iomap(vma);
|
|
||||||
else
|
|
||||||
addr = i915_gem_object_pin_map(vma->obj,
|
|
||||||
i915_coherent_map_type(vma->vm->i915));
|
|
||||||
if (IS_ERR(addr)) {
|
|
||||||
ret = PTR_ERR(addr);
|
|
||||||
goto err_ring;
|
|
||||||
}
|
|
||||||
|
|
||||||
i915_vma_make_unshrinkable(vma);
|
|
||||||
|
|
||||||
GEM_BUG_ON(ring->vaddr);
|
|
||||||
ring->vaddr = addr;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
err_ring:
|
|
||||||
i915_vma_unpin(vma);
|
|
||||||
err_unpin:
|
|
||||||
atomic_dec(&ring->pin_count);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void intel_ring_reset(struct intel_ring *ring, u32 tail)
|
|
||||||
{
|
|
||||||
tail = intel_ring_wrap(ring, tail);
|
|
||||||
ring->tail = tail;
|
|
||||||
ring->head = tail;
|
|
||||||
ring->emit = tail;
|
|
||||||
intel_ring_update_space(ring);
|
|
||||||
}
|
|
||||||
|
|
||||||
void intel_ring_unpin(struct intel_ring *ring)
|
|
||||||
{
|
|
||||||
struct i915_vma *vma = ring->vma;
|
|
||||||
|
|
||||||
if (!atomic_dec_and_test(&ring->pin_count))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Discard any unused bytes beyond that submitted to hw. */
|
|
||||||
intel_ring_reset(ring, ring->emit);
|
|
||||||
|
|
||||||
i915_vma_unset_ggtt_write(vma);
|
|
||||||
if (i915_vma_is_map_and_fenceable(vma))
|
|
||||||
i915_vma_unpin_iomap(vma);
|
|
||||||
else
|
|
||||||
i915_gem_object_unpin_map(vma->obj);
|
|
||||||
|
|
||||||
GEM_BUG_ON(!ring->vaddr);
|
|
||||||
ring->vaddr = NULL;
|
|
||||||
|
|
||||||
i915_vma_unpin(vma);
|
|
||||||
i915_vma_make_purgeable(vma);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
|
|
||||||
{
|
|
||||||
struct i915_address_space *vm = &ggtt->vm;
|
|
||||||
struct drm_i915_private *i915 = vm->i915;
|
|
||||||
struct drm_i915_gem_object *obj;
|
|
||||||
struct i915_vma *vma;
|
|
||||||
|
|
||||||
obj = i915_gem_object_create_stolen(i915, size);
|
|
||||||
if (IS_ERR(obj))
|
|
||||||
obj = i915_gem_object_create_internal(i915, size);
|
|
||||||
if (IS_ERR(obj))
|
|
||||||
return ERR_CAST(obj);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Mark ring buffers as read-only from GPU side (so no stray overwrites)
|
|
||||||
* if supported by the platform's GGTT.
|
|
||||||
*/
|
|
||||||
if (vm->has_read_only)
|
|
||||||
i915_gem_object_set_readonly(obj);
|
|
||||||
|
|
||||||
vma = i915_vma_instance(obj, vm, NULL);
|
|
||||||
if (IS_ERR(vma))
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
return vma;
|
|
||||||
|
|
||||||
err:
|
|
||||||
i915_gem_object_put(obj);
|
|
||||||
return vma;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct intel_ring *
|
|
||||||
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
|
|
||||||
{
|
|
||||||
struct drm_i915_private *i915 = engine->i915;
|
|
||||||
struct intel_ring *ring;
|
|
||||||
struct i915_vma *vma;
|
|
||||||
|
|
||||||
GEM_BUG_ON(!is_power_of_2(size));
|
|
||||||
GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
|
|
||||||
|
|
||||||
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
|
|
||||||
if (!ring)
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
|
|
||||||
kref_init(&ring->ref);
|
|
||||||
|
|
||||||
ring->size = size;
|
|
||||||
/* Workaround an erratum on the i830 which causes a hang if
|
|
||||||
* the TAIL pointer points to within the last 2 cachelines
|
|
||||||
* of the buffer.
|
|
||||||
*/
|
|
||||||
ring->effective_size = size;
|
|
||||||
if (IS_I830(i915) || IS_I845G(i915))
|
|
||||||
ring->effective_size -= 2 * CACHELINE_BYTES;
|
|
||||||
|
|
||||||
intel_ring_update_space(ring);
|
|
||||||
|
|
||||||
vma = create_ring_vma(engine->gt->ggtt, size);
|
|
||||||
if (IS_ERR(vma)) {
|
|
||||||
kfree(ring);
|
|
||||||
return ERR_CAST(vma);
|
|
||||||
}
|
|
||||||
ring->vma = vma;
|
|
||||||
|
|
||||||
return ring;
|
|
||||||
}
|
|
||||||
|
|
||||||
void intel_ring_free(struct kref *ref)
|
|
||||||
{
|
|
||||||
struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
|
|
||||||
|
|
||||||
i915_vma_put(ring->vma);
|
|
||||||
kfree(ring);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __ring_context_fini(struct intel_context *ce)
|
static void __ring_context_fini(struct intel_context *ce)
|
||||||
{
|
{
|
||||||
i915_vma_put(ce->state);
|
i915_vma_put(ce->state);
|
||||||
|
@ -1836,148 +1671,6 @@ static int ring_request_alloc(struct i915_request *request)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline int
|
|
||||||
wait_for_space(struct intel_ring *ring,
|
|
||||||
struct intel_timeline *tl,
|
|
||||||
unsigned int bytes)
|
|
||||||
{
|
|
||||||
struct i915_request *target;
|
|
||||||
long timeout;
|
|
||||||
|
|
||||||
if (intel_ring_update_space(ring) >= bytes)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
GEM_BUG_ON(list_empty(&tl->requests));
|
|
||||||
list_for_each_entry(target, &tl->requests, link) {
|
|
||||||
if (target->ring != ring)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Would completion of this request free enough space? */
|
|
||||||
if (bytes <= __intel_ring_space(target->postfix,
|
|
||||||
ring->emit, ring->size))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (GEM_WARN_ON(&target->link == &tl->requests))
|
|
||||||
return -ENOSPC;
|
|
||||||
|
|
||||||
timeout = i915_request_wait(target,
|
|
||||||
I915_WAIT_INTERRUPTIBLE,
|
|
||||||
MAX_SCHEDULE_TIMEOUT);
|
|
||||||
if (timeout < 0)
|
|
||||||
return timeout;
|
|
||||||
|
|
||||||
i915_request_retire_upto(target);
|
|
||||||
|
|
||||||
intel_ring_update_space(ring);
|
|
||||||
GEM_BUG_ON(ring->space < bytes);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
|
|
||||||
{
|
|
||||||
struct intel_ring *ring = rq->ring;
|
|
||||||
const unsigned int remain_usable = ring->effective_size - ring->emit;
|
|
||||||
const unsigned int bytes = num_dwords * sizeof(u32);
|
|
||||||
unsigned int need_wrap = 0;
|
|
||||||
unsigned int total_bytes;
|
|
||||||
u32 *cs;
|
|
||||||
|
|
||||||
/* Packets must be qword aligned. */
|
|
||||||
GEM_BUG_ON(num_dwords & 1);
|
|
||||||
|
|
||||||
total_bytes = bytes + rq->reserved_space;
|
|
||||||
GEM_BUG_ON(total_bytes > ring->effective_size);
|
|
||||||
|
|
||||||
if (unlikely(total_bytes > remain_usable)) {
|
|
||||||
const int remain_actual = ring->size - ring->emit;
|
|
||||||
|
|
||||||
if (bytes > remain_usable) {
|
|
||||||
/*
|
|
||||||
* Not enough space for the basic request. So need to
|
|
||||||
* flush out the remainder and then wait for
|
|
||||||
* base + reserved.
|
|
||||||
*/
|
|
||||||
total_bytes += remain_actual;
|
|
||||||
need_wrap = remain_actual | 1;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* The base request will fit but the reserved space
|
|
||||||
* falls off the end. So we don't need an immediate
|
|
||||||
* wrap and only need to effectively wait for the
|
|
||||||
* reserved size from the start of ringbuffer.
|
|
||||||
*/
|
|
||||||
total_bytes = rq->reserved_space + remain_actual;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(total_bytes > ring->space)) {
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Space is reserved in the ringbuffer for finalising the
|
|
||||||
* request, as that cannot be allowed to fail. During request
|
|
||||||
* finalisation, reserved_space is set to 0 to stop the
|
|
||||||
* overallocation and the assumption is that then we never need
|
|
||||||
* to wait (which has the risk of failing with EINTR).
|
|
||||||
*
|
|
||||||
* See also i915_request_alloc() and i915_request_add().
|
|
||||||
*/
|
|
||||||
GEM_BUG_ON(!rq->reserved_space);
|
|
||||||
|
|
||||||
ret = wait_for_space(ring,
|
|
||||||
i915_request_timeline(rq),
|
|
||||||
total_bytes);
|
|
||||||
if (unlikely(ret))
|
|
||||||
return ERR_PTR(ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(need_wrap)) {
|
|
||||||
need_wrap &= ~1;
|
|
||||||
GEM_BUG_ON(need_wrap > ring->space);
|
|
||||||
GEM_BUG_ON(ring->emit + need_wrap > ring->size);
|
|
||||||
GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
|
|
||||||
|
|
||||||
/* Fill the tail with MI_NOOP */
|
|
||||||
memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
|
|
||||||
ring->space -= need_wrap;
|
|
||||||
ring->emit = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
GEM_BUG_ON(ring->emit > ring->size - bytes);
|
|
||||||
GEM_BUG_ON(ring->space < bytes);
|
|
||||||
cs = ring->vaddr + ring->emit;
|
|
||||||
GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
|
|
||||||
ring->emit += bytes;
|
|
||||||
ring->space -= bytes;
|
|
||||||
|
|
||||||
return cs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Align the ring tail to a cacheline boundary */
|
|
||||||
int intel_ring_cacheline_align(struct i915_request *rq)
|
|
||||||
{
|
|
||||||
int num_dwords;
|
|
||||||
void *cs;
|
|
||||||
|
|
||||||
num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
|
|
||||||
if (num_dwords == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
num_dwords = CACHELINE_DWORDS - num_dwords;
|
|
||||||
GEM_BUG_ON(num_dwords & 1);
|
|
||||||
|
|
||||||
cs = intel_ring_begin(rq, num_dwords);
|
|
||||||
if (IS_ERR(cs))
|
|
||||||
return PTR_ERR(cs);
|
|
||||||
|
|
||||||
memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
|
|
||||||
intel_ring_advance(rq, cs);
|
|
||||||
|
|
||||||
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gen6_bsd_submit_request(struct i915_request *request)
|
static void gen6_bsd_submit_request(struct i915_request *request)
|
||||||
{
|
{
|
||||||
struct intel_uncore *uncore = request->engine->uncore;
|
struct intel_uncore *uncore = request->engine->uncore;
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
* Copyright © 2019 Intel Corporation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INTEL_RING_TYPES_H
|
||||||
|
#define INTEL_RING_TYPES_H
|
||||||
|
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/kref.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
|
||||||
|
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
|
||||||
|
* to give some inclination as to some of the magic values used in the various
|
||||||
|
* workarounds!
|
||||||
|
*/
|
||||||
|
#define CACHELINE_BYTES 64
|
||||||
|
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
|
||||||
|
|
||||||
|
struct i915_vma;
|
||||||
|
|
||||||
|
struct intel_ring {
|
||||||
|
struct kref ref;
|
||||||
|
struct i915_vma *vma;
|
||||||
|
void *vaddr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As we have two types of rings, one global to the engine used
|
||||||
|
* by ringbuffer submission and those that are exclusive to a
|
||||||
|
* context used by execlists, we have to play safe and allow
|
||||||
|
* atomic updates to the pin_count. However, the actual pinning
|
||||||
|
* of the context is either done during initialisation for
|
||||||
|
* ringbuffer submission or serialised as part of the context
|
||||||
|
* pinning for execlists, and so we do not need a mutex ourselves
|
||||||
|
* to serialise intel_ring_pin/intel_ring_unpin.
|
||||||
|
*/
|
||||||
|
atomic_t pin_count;
|
||||||
|
|
||||||
|
u32 head;
|
||||||
|
u32 tail;
|
||||||
|
u32 emit;
|
||||||
|
|
||||||
|
u32 space;
|
||||||
|
u32 size;
|
||||||
|
u32 effective_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* INTEL_RING_TYPES_H */
|
|
@ -4,13 +4,13 @@
|
||||||
* Copyright © 2016-2018 Intel Corporation
|
* Copyright © 2016-2018 Intel Corporation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "gt/intel_gt_types.h"
|
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
|
|
||||||
#include "i915_active.h"
|
#include "i915_active.h"
|
||||||
#include "i915_syncmap.h"
|
#include "i915_syncmap.h"
|
||||||
#include "gt/intel_timeline.h"
|
#include "intel_gt.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
#include "intel_timeline.h"
|
||||||
|
|
||||||
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
|
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
|
||||||
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
|
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "intel_context.h"
|
#include "intel_context.h"
|
||||||
#include "intel_gt.h"
|
#include "intel_gt.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
#include "intel_workarounds.h"
|
#include "intel_workarounds.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "gem/i915_gem_context.h"
|
#include "gem/i915_gem_context.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "intel_context.h"
|
#include "intel_context.h"
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "intel_engine_pm.h"
|
#include "intel_engine_pm.h"
|
||||||
#include "intel_gt.h"
|
#include "intel_gt.h"
|
||||||
#include "intel_gt_requests.h"
|
#include "intel_gt_requests.h"
|
||||||
|
#include "intel_ring.h"
|
||||||
|
|
||||||
#include "../selftests/i915_random.h"
|
#include "../selftests/i915_random.h"
|
||||||
#include "../i915_selftest.h"
|
#include "../i915_selftest.h"
|
||||||
|
|
|
@ -6,12 +6,13 @@
|
||||||
#include <linux/circ_buf.h>
|
#include <linux/circ_buf.h>
|
||||||
|
|
||||||
#include "gem/i915_gem_context.h"
|
#include "gem/i915_gem_context.h"
|
||||||
|
|
||||||
#include "gt/intel_context.h"
|
#include "gt/intel_context.h"
|
||||||
#include "gt/intel_engine_pm.h"
|
#include "gt/intel_engine_pm.h"
|
||||||
#include "gt/intel_gt.h"
|
#include "gt/intel_gt.h"
|
||||||
#include "gt/intel_gt_pm.h"
|
#include "gt/intel_gt_pm.h"
|
||||||
#include "gt/intel_lrc_reg.h"
|
#include "gt/intel_lrc_reg.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "intel_guc_submission.h"
|
#include "intel_guc_submission.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
|
|
|
@ -35,7 +35,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
#include "gvt.h"
|
#include "gvt.h"
|
||||||
#include "i915_pvinfo.h"
|
#include "i915_pvinfo.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "gt/intel_context.h"
|
#include "gt/intel_context.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
#include "gvt.h"
|
#include "gvt.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#include "gem/i915_gem_context.h"
|
#include "gem/i915_gem_context.h"
|
||||||
#include "gem/i915_gem_pm.h"
|
#include "gem/i915_gem_pm.h"
|
||||||
#include "gt/intel_context.h"
|
#include "gt/intel_context.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "gvt.h"
|
#include "gvt.h"
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <linux/debugobjects.h>
|
#include <linux/debugobjects.h>
|
||||||
|
|
||||||
#include "gt/intel_engine_pm.h"
|
#include "gt/intel_engine_pm.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "i915_active.h"
|
#include "i915_active.h"
|
||||||
|
|
|
@ -200,6 +200,7 @@
|
||||||
#include "gt/intel_engine_user.h"
|
#include "gt/intel_engine_user.h"
|
||||||
#include "gt/intel_gt.h"
|
#include "gt/intel_gt.h"
|
||||||
#include "gt/intel_lrc_reg.h"
|
#include "gt/intel_lrc_reg.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "i915_perf.h"
|
#include "i915_perf.h"
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
|
|
||||||
#include "gem/i915_gem_context.h"
|
#include "gem/i915_gem_context.h"
|
||||||
#include "gt/intel_context.h"
|
#include "gt/intel_context.h"
|
||||||
|
#include "gt/intel_ring.h"
|
||||||
|
|
||||||
#include "i915_active.h"
|
#include "i915_active.h"
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
|
|
Loading…
Reference in New Issue