2010-11-06 05:23:30 +08:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Daniel Vetter
|
2014-02-20 14:05:47 +08:00
|
|
|
* Copyright © 2011-2014 Intel Corporation
|
2010-11-06 05:23:30 +08:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2014-01-08 23:10:27 +08:00
|
|
|
#include <linux/seq_file.h>
|
2012-10-03 01:01:07 +08:00
|
|
|
#include <drm/drmP.h>
|
|
|
|
#include <drm/i915_drm.h>
|
2010-11-06 05:23:30 +08:00
|
|
|
#include "i915_drv.h"
|
2015-02-10 19:05:48 +08:00
|
|
|
#include "i915_vgpu.h"
|
2010-11-06 05:23:30 +08:00
|
|
|
#include "i915_trace.h"
|
|
|
|
#include "intel_drv.h"
|
|
|
|
|
2014-12-11 01:27:59 +08:00
|
|
|
/**
|
|
|
|
* DOC: Global GTT views
|
|
|
|
*
|
|
|
|
* Background and previous state
|
|
|
|
*
|
|
|
|
* Historically objects could exists (be bound) in global GTT space only as
|
|
|
|
* singular instances with a view representing all of the object's backing pages
|
|
|
|
* in a linear fashion. This view will be called a normal view.
|
|
|
|
*
|
|
|
|
* To support multiple views of the same object, where the number of mapped
|
|
|
|
* pages is not equal to the backing store, or where the layout of the pages
|
|
|
|
* is not linear, concept of a GGTT view was added.
|
|
|
|
*
|
|
|
|
* One example of an alternative view is a stereo display driven by a single
|
|
|
|
* image. In this case we would have a framebuffer looking like this
|
|
|
|
* (2x2 pages):
|
|
|
|
*
|
|
|
|
* 12
|
|
|
|
* 34
|
|
|
|
*
|
|
|
|
* Above would represent a normal GGTT view as normally mapped for GPU or CPU
|
|
|
|
* rendering. In contrast, fed to the display engine would be an alternative
|
|
|
|
* view which could look something like this:
|
|
|
|
*
|
|
|
|
* 1212
|
|
|
|
* 3434
|
|
|
|
*
|
|
|
|
* In this example both the size and layout of pages in the alternative view is
|
|
|
|
* different from the normal view.
|
|
|
|
*
|
|
|
|
* Implementation and usage
|
|
|
|
*
|
|
|
|
* GGTT views are implemented using VMAs and are distinguished via enum
|
|
|
|
* i915_ggtt_view_type and struct i915_ggtt_view.
|
|
|
|
*
|
|
|
|
* A new flavour of core GEM functions which work with GGTT bound objects were
|
2015-03-16 20:11:13 +08:00
|
|
|
* added with the _ggtt_ infix, and sometimes with _view postfix to avoid
|
|
|
|
* renaming in large amounts of code. They take the struct i915_ggtt_view
|
|
|
|
* parameter encapsulating all metadata required to implement a view.
|
2014-12-11 01:27:59 +08:00
|
|
|
*
|
|
|
|
* As a helper for callers which are only interested in the normal view,
|
|
|
|
* globally const i915_ggtt_view_normal singleton instance exists. All old core
|
|
|
|
* GEM API functions, the ones not taking the view parameter, are operating on,
|
|
|
|
* or with the normal GGTT view.
|
|
|
|
*
|
|
|
|
* Code wanting to add or use a new GGTT view needs to:
|
|
|
|
*
|
|
|
|
* 1. Add a new enum with a suitable name.
|
|
|
|
* 2. Extend the metadata in the i915_ggtt_view structure if required.
|
|
|
|
* 3. Add support to i915_get_vma_pages().
|
|
|
|
*
|
|
|
|
* New views are required to build a scatter-gather table from within the
|
|
|
|
* i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
|
|
|
|
* exists for the lifetime of an VMA.
|
|
|
|
*
|
|
|
|
* Core API is designed to have copy semantics which means that passed in
|
|
|
|
* struct i915_ggtt_view does not need to be persistent (left around after
|
|
|
|
* calling the core API functions).
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2015-04-14 23:35:27 +08:00
|
|
|
static int
|
|
|
|
i915_get_ggtt_vma_pages(struct i915_vma *vma);
|
|
|
|
|
2014-12-11 01:27:58 +08:00
|
|
|
const struct i915_ggtt_view i915_ggtt_view_normal;
|
2015-03-27 19:09:22 +08:00
|
|
|
const struct i915_ggtt_view i915_ggtt_view_rotated = {
|
|
|
|
.type = I915_GGTT_VIEW_ROTATED
|
|
|
|
};
|
2014-12-11 01:27:58 +08:00
|
|
|
|
2014-04-29 17:53:58 +08:00
|
|
|
static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
|
|
|
|
{
|
2014-09-19 18:56:27 +08:00
|
|
|
bool has_aliasing_ppgtt;
|
|
|
|
bool has_full_ppgtt;
|
|
|
|
|
|
|
|
has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
|
|
|
|
has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
|
|
|
|
|
2015-02-10 19:05:54 +08:00
|
|
|
if (intel_vgpu_active(dev))
|
|
|
|
has_full_ppgtt = false; /* emulation is too hard */
|
|
|
|
|
2014-11-14 23:05:59 +08:00
|
|
|
/*
|
|
|
|
* We don't allow disabling PPGTT for gen9+ as it's a requirement for
|
|
|
|
* execlists, the sole mechanism available to submit work.
|
|
|
|
*/
|
|
|
|
if (INTEL_INFO(dev)->gen < 9 &&
|
|
|
|
(enable_ppgtt == 0 || !has_aliasing_ppgtt))
|
2014-04-29 17:53:58 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (enable_ppgtt == 1)
|
|
|
|
return 1;
|
|
|
|
|
2014-09-19 18:56:27 +08:00
|
|
|
if (enable_ppgtt == 2 && has_full_ppgtt)
|
2014-04-29 17:53:58 +08:00
|
|
|
return 2;
|
|
|
|
|
drm/i915: Disable full ppgtt by default
There are too many oustanding issues:
- Fence handling in the current code is broken. There's a patch series
from me, but it's blocked on and extended review (which includes
writing the testcases).
- IOMMU mapping handling is broken, we need to properly refcount it -
currently it gets destroyed when the first vma is unbound, so way
too early.
- There's a pending reset issue on snb. Since Mika's reset work and
full ppgtt have been pulled in in separate branches and ended up
intermittingly breaking each another it's unclear who's the exact
culprit here.
- We still have persistent evidince of crazy recursion bugs through
vma_unbind and ppgtt_relase, e.g.
https://bugs.freedesktop.org/show_bug.cgi?id=73383
This issue (and a few others meanwhile resolved) have blocked our
performance measuring/tuning group since 3 months.
- Secure batch dispatching is broken. This is blocking Brad Volkin's
command checker work since 3 months.
All these issues are confirmed to only happen when full ppgtt is
enabled, falling back to aliasing ppgtt resolves them. But even
aliasing ppgtt itself still has a regression:
- We currently unconditionally bind objects into the aliasing ppgtt,
which means all priviledged objects like ringbuffers are visible to
unpriviledged access again. On top of that this also breaks the
command checker for aliasing ppgtt, since it can't hide the
validated batch any more.
Furthermore topic/full-ppgtt has never been reviewed:
- Lifetime rules around vma unbinding/release are unclear, resulting
into this awesome hack called ppgtt_release. Which seems to take the
blame for most of the recursion fallout.
- Context/ring init works different on gpu reset than anywhere else.
Such differeneces have in the past always lead to really hard to
track down bugs.
- Aliasing ppgtt is treated in a bunch of places as a real address
space, but it isn't - the real address space is always the global
gtt in that case. This results in a bit a mess between contexts and
ppgtt object, further complication the context/ppgtt/vma lifetime
rules.
- We don't have any docs describing the overall concepts introduced
with full ppgtt. A short, concise overview describing vmas and some
of the strange bits around them (like the unbound vmas used by
execbuf, or the new binding rules) really is needed.
Note that a lot of the post topic/full-ppgtt merge fallout has already
been addressed, this entire list here of 10 issues really only contains
the still outstanding issues.
Finally the 3.15 merge window is approaching and I think we need to
use the remaining time to ensure that our fallback option of using
aliasing ppgtt is in solid shape. Hence I think it's time to throw the
switch. While at it demote the helper from static inline status
because really.
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-06 16:40:43 +08:00
|
|
|
#ifdef CONFIG_INTEL_IOMMU
|
|
|
|
/* Disable ppgtt on SNB if VT-d is on. */
|
|
|
|
if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
|
|
|
|
DRM_INFO("Disabling PPGTT because VT-d is on\n");
|
2014-04-29 17:53:58 +08:00
|
|
|
return 0;
|
drm/i915: Disable full ppgtt by default
There are too many oustanding issues:
- Fence handling in the current code is broken. There's a patch series
from me, but it's blocked on and extended review (which includes
writing the testcases).
- IOMMU mapping handling is broken, we need to properly refcount it -
currently it gets destroyed when the first vma is unbound, so way
too early.
- There's a pending reset issue on snb. Since Mika's reset work and
full ppgtt have been pulled in in separate branches and ended up
intermittingly breaking each another it's unclear who's the exact
culprit here.
- We still have persistent evidince of crazy recursion bugs through
vma_unbind and ppgtt_relase, e.g.
https://bugs.freedesktop.org/show_bug.cgi?id=73383
This issue (and a few others meanwhile resolved) have blocked our
performance measuring/tuning group since 3 months.
- Secure batch dispatching is broken. This is blocking Brad Volkin's
command checker work since 3 months.
All these issues are confirmed to only happen when full ppgtt is
enabled, falling back to aliasing ppgtt resolves them. But even
aliasing ppgtt itself still has a regression:
- We currently unconditionally bind objects into the aliasing ppgtt,
which means all priviledged objects like ringbuffers are visible to
unpriviledged access again. On top of that this also breaks the
command checker for aliasing ppgtt, since it can't hide the
validated batch any more.
Furthermore topic/full-ppgtt has never been reviewed:
- Lifetime rules around vma unbinding/release are unclear, resulting
into this awesome hack called ppgtt_release. Which seems to take the
blame for most of the recursion fallout.
- Context/ring init works different on gpu reset than anywhere else.
Such differeneces have in the past always lead to really hard to
track down bugs.
- Aliasing ppgtt is treated in a bunch of places as a real address
space, but it isn't - the real address space is always the global
gtt in that case. This results in a bit a mess between contexts and
ppgtt object, further complication the context/ppgtt/vma lifetime
rules.
- We don't have any docs describing the overall concepts introduced
with full ppgtt. A short, concise overview describing vmas and some
of the strange bits around them (like the unbound vmas used by
execbuf, or the new binding rules) really is needed.
Note that a lot of the post topic/full-ppgtt merge fallout has already
been addressed, this entire list here of 10 issues really only contains
the still outstanding issues.
Finally the 3.15 merge window is approaching and I think we need to
use the remaining time to ensure that our fallback option of using
aliasing ppgtt is in solid shape. Hence I think it's time to throw the
switch. While at it demote the helper from static inline status
because really.
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-06 16:40:43 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-06-14 00:28:33 +08:00
|
|
|
/* Early VLV doesn't have this */
|
2014-06-28 07:03:56 +08:00
|
|
|
if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
|
|
|
|
dev->pdev->revision < 0xb) {
|
2014-06-14 00:28:33 +08:00
|
|
|
DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-12-15 22:58:00 +08:00
|
|
|
if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
|
|
|
|
return 2;
|
|
|
|
else
|
|
|
|
return has_aliasing_ppgtt ? 1 : 0;
|
drm/i915: Disable full ppgtt by default
There are too many oustanding issues:
- Fence handling in the current code is broken. There's a patch series
from me, but it's blocked on and extended review (which includes
writing the testcases).
- IOMMU mapping handling is broken, we need to properly refcount it -
currently it gets destroyed when the first vma is unbound, so way
too early.
- There's a pending reset issue on snb. Since Mika's reset work and
full ppgtt have been pulled in in separate branches and ended up
intermittingly breaking each another it's unclear who's the exact
culprit here.
- We still have persistent evidince of crazy recursion bugs through
vma_unbind and ppgtt_relase, e.g.
https://bugs.freedesktop.org/show_bug.cgi?id=73383
This issue (and a few others meanwhile resolved) have blocked our
performance measuring/tuning group since 3 months.
- Secure batch dispatching is broken. This is blocking Brad Volkin's
command checker work since 3 months.
All these issues are confirmed to only happen when full ppgtt is
enabled, falling back to aliasing ppgtt resolves them. But even
aliasing ppgtt itself still has a regression:
- We currently unconditionally bind objects into the aliasing ppgtt,
which means all priviledged objects like ringbuffers are visible to
unpriviledged access again. On top of that this also breaks the
command checker for aliasing ppgtt, since it can't hide the
validated batch any more.
Furthermore topic/full-ppgtt has never been reviewed:
- Lifetime rules around vma unbinding/release are unclear, resulting
into this awesome hack called ppgtt_release. Which seems to take the
blame for most of the recursion fallout.
- Context/ring init works different on gpu reset than anywhere else.
Such differeneces have in the past always lead to really hard to
track down bugs.
- Aliasing ppgtt is treated in a bunch of places as a real address
space, but it isn't - the real address space is always the global
gtt in that case. This results in a bit a mess between contexts and
ppgtt object, further complication the context/ppgtt/vma lifetime
rules.
- We don't have any docs describing the overall concepts introduced
with full ppgtt. A short, concise overview describing vmas and some
of the strange bits around them (like the unbound vmas used by
execbuf, or the new binding rules) really is needed.
Note that a lot of the post topic/full-ppgtt merge fallout has already
been addressed, this entire list here of 10 issues really only contains
the still outstanding issues.
Finally the 3.15 merge window is approaching and I think we need to
use the remaining time to ensure that our fallback option of using
aliasing ppgtt is in solid shape. Hence I think it's time to throw the
switch. While at it demote the helper from static inline status
because really.
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-06 16:40:43 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:27 +08:00
|
|
|
static int ppgtt_bind_vma(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 unused)
|
2015-04-14 23:35:24 +08:00
|
|
|
{
|
|
|
|
u32 pte_flags = 0;
|
|
|
|
|
|
|
|
/* Currently applicable only to VLV */
|
|
|
|
if (vma->obj->gt_ro)
|
|
|
|
pte_flags |= PTE_READ_ONLY;
|
|
|
|
|
|
|
|
vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
|
|
|
|
cache_level, pte_flags);
|
2015-04-14 23:35:27 +08:00
|
|
|
|
|
|
|
return 0;
|
2015-04-14 23:35:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ppgtt_unbind_vma(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
vma->vm->clear_range(vma->vm,
|
|
|
|
vma->node.start,
|
|
|
|
vma->obj->base.size,
|
|
|
|
true);
|
|
|
|
}
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid)
|
2013-11-03 12:07:18 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
|
2013-11-03 12:07:18 +08:00
|
|
|
pte |= addr;
|
2014-04-19 05:04:27 +08:00
|
|
|
|
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_NONE:
|
2013-11-05 11:56:49 +08:00
|
|
|
pte |= PPAT_UNCACHED_INDEX;
|
2014-04-19 05:04:27 +08:00
|
|
|
break;
|
|
|
|
case I915_CACHE_WT:
|
|
|
|
pte |= PPAT_DISPLAY_ELLC_INDEX;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
pte |= PPAT_CACHED_INDEX;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-11-03 12:07:18 +08:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static gen8_pde_t gen8_pde_encode(struct drm_device *dev,
|
|
|
|
dma_addr_t addr,
|
|
|
|
enum i915_cache_level level)
|
2013-11-05 13:20:14 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
|
2013-11-05 13:20:14 +08:00
|
|
|
pde |= addr;
|
|
|
|
if (level != I915_CACHE_NONE)
|
|
|
|
pde |= PPAT_CACHED_PDE_INDEX;
|
|
|
|
else
|
|
|
|
pde |= PPAT_UNCACHED_INDEX;
|
|
|
|
return pde;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid, u32 unused)
|
2012-09-25 07:44:32 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
|
2012-09-25 07:44:32 +08:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
2012-10-20 00:33:22 +08:00
|
|
|
|
|
|
|
switch (level) {
|
2013-08-06 20:17:02 +08:00
|
|
|
case I915_CACHE_L3_LLC:
|
|
|
|
case I915_CACHE_LLC:
|
|
|
|
pte |= GEN6_PTE_CACHE_LLC;
|
|
|
|
break;
|
|
|
|
case I915_CACHE_NONE:
|
|
|
|
pte |= GEN6_PTE_UNCACHED;
|
|
|
|
break;
|
|
|
|
default:
|
2014-12-08 23:40:10 +08:00
|
|
|
MISSING_CASE(level);
|
2013-08-06 20:17:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid, u32 unused)
|
2013-08-06 20:17:02 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
|
2013-08-06 20:17:02 +08:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_L3_LLC:
|
|
|
|
pte |= GEN7_PTE_CACHE_L3_LLC;
|
2012-10-20 00:33:22 +08:00
|
|
|
break;
|
|
|
|
case I915_CACHE_LLC:
|
|
|
|
pte |= GEN6_PTE_CACHE_LLC;
|
|
|
|
break;
|
|
|
|
case I915_CACHE_NONE:
|
2013-04-22 15:53:51 +08:00
|
|
|
pte |= GEN6_PTE_UNCACHED;
|
2012-10-20 00:33:22 +08:00
|
|
|
break;
|
|
|
|
default:
|
2014-12-08 23:40:10 +08:00
|
|
|
MISSING_CASE(level);
|
2012-10-20 00:33:22 +08:00
|
|
|
}
|
|
|
|
|
2012-09-25 07:44:32 +08:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid, u32 flags)
|
2013-04-22 15:53:50 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
|
2013-04-22 15:53:50 +08:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
2014-06-17 13:29:42 +08:00
|
|
|
if (!(flags & PTE_READ_ONLY))
|
|
|
|
pte |= BYT_PTE_WRITEABLE;
|
2013-04-22 15:53:50 +08:00
|
|
|
|
|
|
|
if (level != I915_CACHE_NONE)
|
|
|
|
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
|
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid, u32 unused)
|
2013-04-22 15:53:51 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
|
2013-07-05 02:02:03 +08:00
|
|
|
pte |= HSW_PTE_ADDR_ENCODE(addr);
|
2013-04-22 15:53:51 +08:00
|
|
|
|
|
|
|
if (level != I915_CACHE_NONE)
|
2013-08-05 14:47:29 +08:00
|
|
|
pte |= HSW_WB_LLC_AGE3;
|
2013-04-22 15:53:51 +08:00
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
bool valid, u32 unused)
|
2013-07-05 02:02:06 +08:00
|
|
|
{
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
|
2013-07-05 02:02:06 +08:00
|
|
|
pte |= HSW_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
2013-08-08 21:41:10 +08:00
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_NONE:
|
|
|
|
break;
|
|
|
|
case I915_CACHE_WT:
|
2013-11-22 18:37:53 +08:00
|
|
|
pte |= HSW_WT_ELLC_LLC_AGE3;
|
2013-08-08 21:41:10 +08:00
|
|
|
break;
|
|
|
|
default:
|
2013-11-22 18:37:53 +08:00
|
|
|
pte |= HSW_WB_ELLC_LLC_AGE3;
|
2013-08-08 21:41:10 +08:00
|
|
|
break;
|
|
|
|
}
|
2013-07-05 02:02:06 +08:00
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
#define i915_dma_unmap_single(px, dev) \
|
|
|
|
__i915_dma_unmap_single((px)->daddr, dev)
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static void __i915_dma_unmap_single(dma_addr_t daddr,
|
|
|
|
struct drm_device *dev)
|
2015-03-17 00:00:56 +08:00
|
|
|
{
|
|
|
|
struct device *device = &dev->pdev->dev;
|
|
|
|
|
|
|
|
dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_dma_map_single() - Create a dma mapping for a page table/dir/etc.
|
|
|
|
* @px: Page table/dir/etc to get a DMA map for
|
|
|
|
* @dev: drm device
|
|
|
|
*
|
|
|
|
* Page table allocations are unified across all gens. They always require a
|
|
|
|
* single 4k allocation, as well as a DMA mapping. If we keep the structs
|
|
|
|
* symmetric here, the simple macro covers us for every page table type.
|
|
|
|
*
|
|
|
|
* Return: 0 if success.
|
|
|
|
*/
|
|
|
|
#define i915_dma_map_single(px, dev) \
|
|
|
|
i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static int i915_dma_map_page_single(struct page *page,
|
|
|
|
struct drm_device *dev,
|
|
|
|
dma_addr_t *daddr)
|
2015-03-17 00:00:56 +08:00
|
|
|
{
|
|
|
|
struct device *device = &dev->pdev->dev;
|
|
|
|
|
|
|
|
*daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
|
2015-03-25 01:06:33 +08:00
|
|
|
if (dma_mapping_error(device, *daddr))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
2015-03-17 00:00:56 +08:00
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:23 +08:00
|
|
|
static void unmap_and_free_pt(struct i915_page_table *pt,
|
2015-03-17 00:00:56 +08:00
|
|
|
struct drm_device *dev)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
{
|
|
|
|
if (WARN_ON(!pt->page))
|
|
|
|
return;
|
2015-03-17 00:00:56 +08:00
|
|
|
|
|
|
|
i915_dma_unmap_single(pt, dev);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
__free_page(pt->page);
|
2015-03-17 00:00:56 +08:00
|
|
|
kfree(pt->used_ptes);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
kfree(pt);
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:25 +08:00
|
|
|
static void gen8_initialize_pt(struct i915_address_space *vm,
|
2015-04-08 19:13:32 +08:00
|
|
|
struct i915_page_table *pt)
|
2015-04-08 19:13:25 +08:00
|
|
|
{
|
|
|
|
gen8_pte_t *pt_vaddr, scratch_pte;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pt_vaddr = kmap_atomic(pt->page);
|
|
|
|
scratch_pte = gen8_pte_encode(vm->scratch.addr,
|
|
|
|
I915_CACHE_LLC, true);
|
|
|
|
|
|
|
|
for (i = 0; i < GEN8_PTES; i++)
|
|
|
|
pt_vaddr[i] = scratch_pte;
|
|
|
|
|
|
|
|
if (!HAS_LLC(vm->dev))
|
|
|
|
drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
|
|
|
|
kunmap_atomic(pt_vaddr);
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:23 +08:00
|
|
|
static struct i915_page_table *alloc_pt_single(struct drm_device *dev)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
{
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *pt;
|
2015-03-17 00:00:56 +08:00
|
|
|
const size_t count = INTEL_INFO(dev)->gen >= 8 ?
|
|
|
|
GEN8_PTES : GEN6_PTES;
|
|
|
|
int ret = -ENOMEM;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
|
|
|
|
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
|
|
|
|
if (!pt)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
|
|
|
|
GFP_KERNEL);
|
|
|
|
|
|
|
|
if (!pt->used_ptes)
|
|
|
|
goto fail_bitmap;
|
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
pt->page = alloc_page(GFP_KERNEL);
|
2015-03-17 00:00:56 +08:00
|
|
|
if (!pt->page)
|
|
|
|
goto fail_page;
|
|
|
|
|
|
|
|
ret = i915_dma_map_single(pt, dev);
|
|
|
|
if (ret)
|
|
|
|
goto fail_dma;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
|
|
|
|
return pt;
|
2015-03-17 00:00:56 +08:00
|
|
|
|
|
|
|
fail_dma:
|
|
|
|
__free_page(pt->page);
|
|
|
|
fail_page:
|
|
|
|
kfree(pt->used_ptes);
|
|
|
|
fail_bitmap:
|
|
|
|
kfree(pt);
|
|
|
|
|
|
|
|
return ERR_PTR(ret);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
static void unmap_and_free_pd(struct i915_page_directory *pd,
|
|
|
|
struct drm_device *dev)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
{
|
|
|
|
if (pd->page) {
|
2015-04-08 19:13:32 +08:00
|
|
|
i915_dma_unmap_single(pd, dev);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
__free_page(pd->page);
|
2015-04-08 19:13:33 +08:00
|
|
|
kfree(pd->used_pdes);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
kfree(pd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
static struct i915_page_directory *alloc_pd_single(struct drm_device *dev)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
{
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_directory *pd;
|
2015-04-08 19:13:33 +08:00
|
|
|
int ret = -ENOMEM;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
|
|
|
|
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
|
|
|
|
if (!pd)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2015-04-08 19:13:33 +08:00
|
|
|
pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
|
|
|
|
sizeof(*pd->used_pdes), GFP_KERNEL);
|
|
|
|
if (!pd->used_pdes)
|
|
|
|
goto free_pd;
|
|
|
|
|
2015-04-08 19:13:25 +08:00
|
|
|
pd->page = alloc_page(GFP_KERNEL);
|
2015-04-08 19:13:33 +08:00
|
|
|
if (!pd->page)
|
|
|
|
goto free_bitmap;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
ret = i915_dma_map_single(pd, dev);
|
2015-04-08 19:13:33 +08:00
|
|
|
if (ret)
|
|
|
|
goto free_page;
|
2015-04-08 19:13:32 +08:00
|
|
|
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
return pd;
|
2015-04-08 19:13:33 +08:00
|
|
|
|
|
|
|
free_page:
|
|
|
|
__free_page(pd->page);
|
|
|
|
free_bitmap:
|
|
|
|
kfree(pd->used_pdes);
|
|
|
|
free_pd:
|
|
|
|
kfree(pd);
|
|
|
|
|
|
|
|
return ERR_PTR(ret);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
}
|
|
|
|
|
2013-11-05 14:29:36 +08:00
|
|
|
/* Broadwell Page Directory Pointer Descriptors */
|
2015-04-08 19:13:29 +08:00
|
|
|
static int gen8_write_pdp(struct intel_engine_cs *ring,
|
|
|
|
unsigned entry,
|
|
|
|
dma_addr_t addr)
|
2013-11-05 14:29:36 +08:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
BUG_ON(entry >= 4);
|
|
|
|
|
|
|
|
ret = intel_ring_begin(ring, 6);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
|
|
|
|
intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
|
2015-04-08 19:13:29 +08:00
|
|
|
intel_ring_emit(ring, upper_32_bits(addr));
|
2013-11-05 14:29:36 +08:00
|
|
|
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
|
|
|
|
intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
|
2015-04-08 19:13:29 +08:00
|
|
|
intel_ring_emit(ring, lower_32_bits(addr));
|
2013-11-05 14:29:36 +08:00
|
|
|
intel_ring_advance(ring);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:10 +08:00
|
|
|
static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
|
2014-08-16 01:51:35 +08:00
|
|
|
struct intel_engine_cs *ring)
|
2013-11-05 14:29:36 +08:00
|
|
|
{
|
2013-12-07 06:11:10 +08:00
|
|
|
int i, ret;
|
2013-11-05 14:29:36 +08:00
|
|
|
|
2015-04-08 19:13:29 +08:00
|
|
|
for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
|
|
|
|
struct i915_page_directory *pd = ppgtt->pdp.page_directory[i];
|
|
|
|
dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr;
|
|
|
|
/* The page directory might be NULL, but we need to clear out
|
|
|
|
* whatever the previous context might have used. */
|
|
|
|
ret = gen8_write_pdp(ring, i, pd_daddr);
|
2013-12-07 06:11:10 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2013-11-05 14:29:36 +08:00
|
|
|
}
|
2013-11-26 01:54:32 +08:00
|
|
|
|
2013-12-07 06:11:10 +08:00
|
|
|
return 0;
|
2013-11-05 14:29:36 +08:00
|
|
|
}
|
|
|
|
|
2013-11-03 12:07:23 +08:00
|
|
|
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
|
|
|
uint64_t length,
|
2013-11-03 12:07:23 +08:00
|
|
|
bool use_scratch)
|
|
|
|
{
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pte_t *pt_vaddr, scratch_pte;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
|
|
|
|
unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
|
|
|
|
unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned num_entries = length >> PAGE_SHIFT;
|
2013-11-03 12:07:23 +08:00
|
|
|
unsigned last_pte, i;
|
|
|
|
|
|
|
|
scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
|
|
|
|
I915_CACHE_LLC, use_scratch);
|
|
|
|
|
|
|
|
while (num_entries) {
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_directory *pd;
|
|
|
|
struct i915_page_table *pt;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
struct page *page_table;
|
|
|
|
|
|
|
|
if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pd = ppgtt->pdp.page_directory[pdpe];
|
|
|
|
|
|
|
|
if (WARN_ON(!pd->page_table[pde]))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pt = pd->page_table[pde];
|
|
|
|
|
|
|
|
if (WARN_ON(!pt->page))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
page_table = pt->page;
|
2013-11-03 12:07:23 +08:00
|
|
|
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
last_pte = pte + num_entries;
|
2015-03-17 00:00:54 +08:00
|
|
|
if (last_pte > GEN8_PTES)
|
|
|
|
last_pte = GEN8_PTES;
|
2013-11-03 12:07:23 +08:00
|
|
|
|
|
|
|
pt_vaddr = kmap_atomic(page_table);
|
|
|
|
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
for (i = pte; i < last_pte; i++) {
|
2013-11-03 12:07:23 +08:00
|
|
|
pt_vaddr[i] = scratch_pte;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
num_entries--;
|
|
|
|
}
|
2013-11-03 12:07:23 +08:00
|
|
|
|
2014-04-09 18:28:02 +08:00
|
|
|
if (!HAS_LLC(ppgtt->base.dev))
|
|
|
|
drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
|
2013-11-03 12:07:23 +08:00
|
|
|
kunmap_atomic(pt_vaddr);
|
|
|
|
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
pte = 0;
|
2015-03-17 00:00:54 +08:00
|
|
|
if (++pde == I915_PDES) {
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
pdpe++;
|
|
|
|
pde = 0;
|
|
|
|
}
|
2013-11-03 12:07:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-03 12:07:24 +08:00
|
|
|
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
|
|
|
|
struct sg_table *pages,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
2014-06-17 13:29:42 +08:00
|
|
|
enum i915_cache_level cache_level, u32 unused)
|
2013-11-03 12:07:24 +08:00
|
|
|
{
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pte_t *pt_vaddr;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
|
|
|
|
unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
|
|
|
|
unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
|
2013-11-03 12:07:24 +08:00
|
|
|
struct sg_page_iter sg_iter;
|
|
|
|
|
2013-12-31 23:50:31 +08:00
|
|
|
pt_vaddr = NULL;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
|
2013-11-03 12:07:24 +08:00
|
|
|
for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
|
2015-01-23 01:01:24 +08:00
|
|
|
if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
break;
|
|
|
|
|
2015-02-25 00:22:34 +08:00
|
|
|
if (pt_vaddr == NULL) {
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
|
|
|
|
struct i915_page_table *pt = pd->page_table[pde];
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
struct page *page_table = pt->page;
|
2015-02-25 00:22:34 +08:00
|
|
|
|
|
|
|
pt_vaddr = kmap_atomic(page_table);
|
|
|
|
}
|
2013-11-03 12:07:24 +08:00
|
|
|
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
pt_vaddr[pte] =
|
2013-12-31 23:50:31 +08:00
|
|
|
gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
|
|
|
|
cache_level, true);
|
2015-03-17 00:00:54 +08:00
|
|
|
if (++pte == GEN8_PTES) {
|
2014-04-09 18:28:02 +08:00
|
|
|
if (!HAS_LLC(ppgtt->base.dev))
|
|
|
|
drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
|
2013-11-03 12:07:24 +08:00
|
|
|
kunmap_atomic(pt_vaddr);
|
2013-12-31 23:50:31 +08:00
|
|
|
pt_vaddr = NULL;
|
2015-03-17 00:00:54 +08:00
|
|
|
if (++pde == I915_PDES) {
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
pdpe++;
|
|
|
|
pde = 0;
|
|
|
|
}
|
|
|
|
pte = 0;
|
2013-11-03 12:07:24 +08:00
|
|
|
}
|
|
|
|
}
|
2014-04-09 18:28:02 +08:00
|
|
|
if (pt_vaddr) {
|
|
|
|
if (!HAS_LLC(ppgtt->base.dev))
|
|
|
|
drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
|
2013-12-31 23:50:31 +08:00
|
|
|
kunmap_atomic(pt_vaddr);
|
2014-04-09 18:28:02 +08:00
|
|
|
}
|
2013-11-03 12:07:24 +08:00
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:27 +08:00
|
|
|
static void __gen8_do_map_pt(gen8_pde_t * const pde,
|
|
|
|
struct i915_page_table *pt,
|
|
|
|
struct drm_device *dev)
|
|
|
|
{
|
|
|
|
gen8_pde_t entry =
|
|
|
|
gen8_pde_encode(dev, pt->daddr, I915_CACHE_LLC);
|
|
|
|
*pde = entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen8_initialize_pd(struct i915_address_space *vm,
|
|
|
|
struct i915_page_directory *pd)
|
|
|
|
{
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
|
|
|
gen8_pde_t *page_directory;
|
|
|
|
struct i915_page_table *pt;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
page_directory = kmap_atomic(pd->page);
|
|
|
|
pt = ppgtt->scratch_pt;
|
|
|
|
for (i = 0; i < I915_PDES; i++)
|
|
|
|
/* Map the PDE to the page table */
|
|
|
|
__gen8_do_map_pt(page_directory + i, pt, vm->dev);
|
|
|
|
|
|
|
|
if (!HAS_LLC(vm->dev))
|
|
|
|
drm_clflush_virt_range(page_directory, PAGE_SIZE);
|
2015-04-08 19:13:32 +08:00
|
|
|
kunmap_atomic(page_directory);
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:23 +08:00
|
|
|
static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
if (!pd->page)
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
return;
|
|
|
|
|
2015-04-08 19:13:33 +08:00
|
|
|
for_each_set_bit(i, pd->used_pdes, I915_PDES) {
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
if (WARN_ON(!pd->page_table[i]))
|
|
|
|
continue;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
|
2015-02-25 00:22:37 +08:00
|
|
|
unmap_and_free_pt(pd->page_table[i], dev);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
pd->page_table[i] = NULL;
|
|
|
|
}
|
2015-02-25 00:22:34 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:13 +08:00
|
|
|
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
|
2014-02-13 06:28:44 +08:00
|
|
|
{
|
2015-04-14 23:35:13 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2014-02-13 06:28:44 +08:00
|
|
|
int i;
|
|
|
|
|
2015-04-08 19:13:33 +08:00
|
|
|
for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
if (WARN_ON(!ppgtt->pdp.page_directory[i]))
|
|
|
|
continue;
|
|
|
|
|
2015-02-25 00:22:37 +08:00
|
|
|
gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
|
2015-04-08 19:13:32 +08:00
|
|
|
unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
}
|
2015-04-08 19:13:27 +08:00
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
|
2015-04-08 19:13:27 +08:00
|
|
|
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
|
2014-02-13 06:28:44 +08:00
|
|
|
}
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/**
|
|
|
|
* gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
|
|
|
|
* @ppgtt: Master ppgtt structure.
|
|
|
|
* @pd: Page directory for this address range.
|
|
|
|
* @start: Starting virtual address to begin allocations.
|
|
|
|
* @length Size of the allocations.
|
|
|
|
* @new_pts: Bitmap set by function with new allocations. Likely used by the
|
|
|
|
* caller to free on error.
|
|
|
|
*
|
|
|
|
* Allocate the required number of page tables. Extremely similar to
|
|
|
|
* gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
|
|
|
|
* the page directory boundary (instead of the page directory pointer). That
|
|
|
|
* boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
|
|
|
|
* possible, and likely that the caller will need to use multiple calls of this
|
|
|
|
* function to achieve the appropriate allocation.
|
|
|
|
*
|
|
|
|
* Return: 0 if success; negative error code otherwise.
|
|
|
|
*/
|
2015-04-08 19:13:32 +08:00
|
|
|
static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
|
|
|
|
struct i915_page_directory *pd,
|
2015-04-08 19:13:28 +08:00
|
|
|
uint64_t start,
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
uint64_t length,
|
|
|
|
unsigned long *new_pts)
|
2014-02-20 14:05:43 +08:00
|
|
|
{
|
2015-04-08 19:13:32 +08:00
|
|
|
struct drm_device *dev = ppgtt->base.dev;
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
struct i915_page_table *pt;
|
2015-04-08 19:13:28 +08:00
|
|
|
uint64_t temp;
|
|
|
|
uint32_t pde;
|
2014-02-20 14:05:43 +08:00
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
gen8_for_each_pde(pt, pd, start, length, temp, pde) {
|
|
|
|
/* Don't reallocate page tables */
|
|
|
|
if (pt) {
|
|
|
|
/* Scratch is never allocated this way */
|
|
|
|
WARN_ON(pt == ppgtt->scratch_pt);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
pt = alloc_pt_single(dev);
|
|
|
|
if (IS_ERR(pt))
|
2015-04-08 19:13:28 +08:00
|
|
|
goto unwind_out;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
gen8_initialize_pt(&ppgtt->base, pt);
|
|
|
|
pd->page_table[pde] = pt;
|
|
|
|
set_bit(pde, new_pts);
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
}
|
|
|
|
|
2014-02-20 14:05:43 +08:00
|
|
|
return 0;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
|
|
|
|
unwind_out:
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
for_each_set_bit(pde, new_pts, I915_PDES)
|
2015-04-08 19:13:32 +08:00
|
|
|
unmap_and_free_pt(pd->page_table[pde], dev);
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 03:51:21 +08:00
|
|
|
|
2015-02-25 00:22:34 +08:00
|
|
|
return -ENOMEM;
|
2014-02-20 14:05:43 +08:00
|
|
|
}
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/**
|
|
|
|
* gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
|
|
|
|
* @ppgtt: Master ppgtt structure.
|
|
|
|
* @pdp: Page directory pointer for this address range.
|
|
|
|
* @start: Starting virtual address to begin allocations.
|
|
|
|
* @length Size of the allocations.
|
|
|
|
* @new_pds Bitmap set by function with new allocations. Likely used by the
|
|
|
|
* caller to free on error.
|
|
|
|
*
|
|
|
|
* Allocate the required number of page directories starting at the pde index of
|
|
|
|
* @start, and ending at the pde index @start + @length. This function will skip
|
|
|
|
* over already allocated page directories within the range, and only allocate
|
|
|
|
* new ones, setting the appropriate pointer within the pdp as well as the
|
|
|
|
* correct position in the bitmap @new_pds.
|
|
|
|
*
|
|
|
|
* The function will only allocate the pages within the range for a give page
|
|
|
|
* directory pointer. In other words, if @start + @length straddles a virtually
|
|
|
|
* addressed PDP boundary (512GB for 4k pages), there will be more allocations
|
|
|
|
* required by the caller, This is not currently possible, and the BUG in the
|
|
|
|
* code will prevent it.
|
|
|
|
*
|
|
|
|
* Return: 0 if success; negative error code otherwise.
|
|
|
|
*/
|
2015-04-08 19:13:31 +08:00
|
|
|
static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
|
|
|
|
struct i915_page_directory_pointer *pdp,
|
2015-04-08 19:13:27 +08:00
|
|
|
uint64_t start,
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
uint64_t length,
|
|
|
|
unsigned long *new_pds)
|
2014-02-20 14:05:43 +08:00
|
|
|
{
|
2015-04-08 19:13:32 +08:00
|
|
|
struct drm_device *dev = ppgtt->base.dev;
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
struct i915_page_directory *pd;
|
2015-04-08 19:13:27 +08:00
|
|
|
uint64_t temp;
|
|
|
|
uint32_t pdpe;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
|
|
|
|
|
2015-04-30 23:06:51 +08:00
|
|
|
/* FIXME: upper bound must not overflow 32 bits */
|
2015-05-12 15:35:08 +08:00
|
|
|
WARN_ON((start + length) > (1ULL << 32));
|
2015-04-08 19:13:27 +08:00
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
|
|
|
|
if (pd)
|
|
|
|
continue;
|
2015-04-08 19:13:33 +08:00
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
pd = alloc_pd_single(dev);
|
|
|
|
if (IS_ERR(pd))
|
2015-02-25 00:22:34 +08:00
|
|
|
goto unwind_out;
|
2015-04-08 19:13:27 +08:00
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
gen8_initialize_pd(&ppgtt->base, pd);
|
|
|
|
pdp->page_directory[pdpe] = pd;
|
|
|
|
set_bit(pdpe, new_pds);
|
2015-02-25 00:22:34 +08:00
|
|
|
}
|
|
|
|
|
2014-02-20 14:05:43 +08:00
|
|
|
return 0;
|
2015-02-25 00:22:34 +08:00
|
|
|
|
|
|
|
unwind_out:
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
|
2015-04-08 19:13:32 +08:00
|
|
|
unmap_and_free_pd(pdp->page_directory[pdpe], dev);
|
2015-02-25 00:22:34 +08:00
|
|
|
|
|
|
|
return -ENOMEM;
|
2014-02-20 14:05:43 +08:00
|
|
|
}
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
static void
|
|
|
|
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < GEN8_LEGACY_PDPES; i++)
|
|
|
|
kfree(new_pts[i]);
|
|
|
|
kfree(new_pts);
|
|
|
|
kfree(new_pds);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
|
|
|
|
* of these are based on the number of PDPEs in the system.
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
|
|
|
|
unsigned long ***new_pts)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
unsigned long *pds;
|
|
|
|
unsigned long **pts;
|
|
|
|
|
|
|
|
pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
|
|
|
|
if (!pds)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
|
|
|
|
if (!pts) {
|
|
|
|
kfree(pds);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
|
|
|
|
pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
|
|
|
|
sizeof(unsigned long), GFP_KERNEL);
|
|
|
|
if (!pts[i])
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
*new_pds = pds;
|
|
|
|
*new_pts = pts;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_out:
|
|
|
|
free_gen8_temp_bitmaps(pds, pts);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
static int gen8_alloc_va_range(struct i915_address_space *vm,
|
|
|
|
uint64_t start,
|
|
|
|
uint64_t length)
|
2014-02-20 14:05:43 +08:00
|
|
|
{
|
2015-04-08 19:13:32 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
unsigned long *new_page_dirs, **new_page_tables;
|
2015-04-08 19:13:28 +08:00
|
|
|
struct i915_page_directory *pd;
|
2015-04-08 19:13:33 +08:00
|
|
|
const uint64_t orig_start = start;
|
|
|
|
const uint64_t orig_length = length;
|
2015-04-08 19:13:28 +08:00
|
|
|
uint64_t temp;
|
|
|
|
uint32_t pdpe;
|
2014-02-20 14:05:43 +08:00
|
|
|
int ret;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/* Wrap is never okay since we can only represent 48b, and we don't
|
|
|
|
* actually use the other side of the canonical address space.
|
|
|
|
*/
|
|
|
|
if (WARN_ON(start + length < start))
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
|
2014-02-20 14:05:43 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/* Do the allocations first so we can easily bail out */
|
|
|
|
ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
|
|
|
|
new_page_dirs);
|
|
|
|
if (ret) {
|
|
|
|
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For every page directory referenced, allocate page tables */
|
2015-04-08 19:13:28 +08:00
|
|
|
gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
|
|
|
|
new_page_tables[pdpe]);
|
2015-04-08 19:13:28 +08:00
|
|
|
if (ret)
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
|
2015-04-08 19:13:33 +08:00
|
|
|
start = orig_start;
|
|
|
|
length = orig_length;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/* Allocations have completed successfully, so set the bitmaps, and do
|
|
|
|
* the mappings. */
|
2015-04-08 19:13:33 +08:00
|
|
|
gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
gen8_pde_t *const page_directory = kmap_atomic(pd->page);
|
2015-04-08 19:13:33 +08:00
|
|
|
struct i915_page_table *pt;
|
|
|
|
uint64_t pd_len = gen8_clamp_pd(start, length);
|
|
|
|
uint64_t pd_start = start;
|
|
|
|
uint32_t pde;
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
/* Every pd should be allocated, we just did that above. */
|
|
|
|
WARN_ON(!pd);
|
|
|
|
|
|
|
|
gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
|
|
|
|
/* Same reasoning as pd */
|
|
|
|
WARN_ON(!pt);
|
|
|
|
WARN_ON(!pd_len);
|
|
|
|
WARN_ON(!gen8_pte_count(pd_start, pd_len));
|
|
|
|
|
|
|
|
/* Set our used ptes within the page table */
|
|
|
|
bitmap_set(pt->used_ptes,
|
|
|
|
gen8_pte_index(pd_start),
|
|
|
|
gen8_pte_count(pd_start, pd_len));
|
|
|
|
|
|
|
|
/* Our pde is now pointing to the pagetable, pt */
|
2015-04-08 19:13:33 +08:00
|
|
|
set_bit(pde, pd->used_pdes);
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
|
|
|
|
/* Map the PDE to the page table */
|
|
|
|
__gen8_do_map_pt(page_directory + pde, pt, vm->dev);
|
|
|
|
|
|
|
|
/* NB: We haven't yet mapped ptes to pages. At this
|
|
|
|
* point we're still relying on insert_entries() */
|
2015-04-08 19:13:33 +08:00
|
|
|
}
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
|
|
|
|
if (!HAS_LLC(vm->dev))
|
|
|
|
drm_clflush_virt_range(page_directory, PAGE_SIZE);
|
|
|
|
|
|
|
|
kunmap_atomic(page_directory);
|
|
|
|
|
2015-04-08 19:13:33 +08:00
|
|
|
set_bit(pdpe, ppgtt->pdp.used_pdpes);
|
|
|
|
}
|
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
|
2015-02-25 00:22:34 +08:00
|
|
|
return 0;
|
2014-02-20 14:05:43 +08:00
|
|
|
|
2015-02-25 00:22:34 +08:00
|
|
|
err_out:
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
while (pdpe--) {
|
|
|
|
for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
|
|
|
|
unmap_and_free_pt(ppgtt->pdp.page_directory[pdpe]->page_table[temp], vm->dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
|
|
|
|
unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev);
|
|
|
|
|
|
|
|
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
|
2014-02-20 14:05:43 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-03-18 21:47:59 +08:00
|
|
|
/*
|
2014-02-20 14:05:42 +08:00
|
|
|
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
|
|
|
|
* with a net effect resembling a 2-level page table in normal x86 terms. Each
|
|
|
|
* PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
|
|
|
|
* space.
|
2013-11-05 12:47:32 +08:00
|
|
|
*
|
2014-02-20 14:05:42 +08:00
|
|
|
*/
|
2015-04-14 23:35:14 +08:00
|
|
|
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
|
2013-11-05 12:47:32 +08:00
|
|
|
{
|
2015-04-08 19:13:27 +08:00
|
|
|
ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
|
|
|
|
if (IS_ERR(ppgtt->scratch_pt))
|
|
|
|
return PTR_ERR(ppgtt->scratch_pt);
|
|
|
|
|
2015-04-08 19:13:32 +08:00
|
|
|
ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev);
|
2015-04-08 19:13:29 +08:00
|
|
|
if (IS_ERR(ppgtt->scratch_pd))
|
|
|
|
return PTR_ERR(ppgtt->scratch_pd);
|
|
|
|
|
2015-04-08 19:13:27 +08:00
|
|
|
gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
|
2015-04-08 19:13:29 +08:00
|
|
|
gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd);
|
2015-04-08 19:13:27 +08:00
|
|
|
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
ppgtt->base.start = 0;
|
2015-04-14 23:35:14 +08:00
|
|
|
ppgtt->base.total = 1ULL << 32;
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
|
2015-04-14 23:35:14 +08:00
|
|
|
ppgtt->base.allocate_va_range = gen8_alloc_va_range;
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
|
2015-04-14 23:35:11 +08:00
|
|
|
ppgtt->base.clear_range = gen8_ppgtt_clear_range;
|
2015-04-14 23:35:12 +08:00
|
|
|
ppgtt->base.unbind_vma = ppgtt_unbind_vma;
|
|
|
|
ppgtt->base.bind_vma = ppgtt_bind_vma;
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
|
|
|
|
ppgtt->switch_mm = gen8_mm_switch;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:29 +08:00
|
|
|
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
|
|
|
|
{
|
|
|
|
struct i915_address_space *vm = &ppgtt->base;
|
2015-04-08 19:13:30 +08:00
|
|
|
struct i915_page_table *unused;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t scratch_pte;
|
2013-12-07 06:11:29 +08:00
|
|
|
uint32_t pd_entry;
|
2015-04-08 19:13:30 +08:00
|
|
|
uint32_t pte, pde, temp;
|
|
|
|
uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
|
2013-12-07 06:11:29 +08:00
|
|
|
|
2014-06-17 13:29:42 +08:00
|
|
|
scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
|
2013-12-07 06:11:29 +08:00
|
|
|
|
2015-04-08 19:13:30 +08:00
|
|
|
gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
|
2013-12-07 06:11:29 +08:00
|
|
|
u32 expected;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t *pt_vaddr;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr;
|
2015-04-08 19:13:30 +08:00
|
|
|
pd_entry = readl(ppgtt->pd_addr + pde);
|
2013-12-07 06:11:29 +08:00
|
|
|
expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
|
|
|
|
|
|
|
|
if (pd_entry != expected)
|
|
|
|
seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
|
|
|
|
pde,
|
|
|
|
pd_entry,
|
|
|
|
expected);
|
|
|
|
seq_printf(m, "\tPDE: %x\n", pd_entry);
|
|
|
|
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page);
|
2015-03-17 00:00:54 +08:00
|
|
|
for (pte = 0; pte < GEN6_PTES; pte+=4) {
|
2013-12-07 06:11:29 +08:00
|
|
|
unsigned long va =
|
2015-03-17 00:00:54 +08:00
|
|
|
(pde * PAGE_SIZE * GEN6_PTES) +
|
2013-12-07 06:11:29 +08:00
|
|
|
(pte * PAGE_SIZE);
|
|
|
|
int i;
|
|
|
|
bool found = false;
|
|
|
|
for (i = 0; i < 4; i++)
|
|
|
|
if (pt_vaddr[pte + i] != scratch_pte)
|
|
|
|
found = true;
|
|
|
|
if (!found)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
if (pt_vaddr[pte + i] != scratch_pte)
|
|
|
|
seq_printf(m, " %08x", pt_vaddr[pte + i]);
|
|
|
|
else
|
|
|
|
seq_puts(m, " SCRATCH ");
|
|
|
|
}
|
|
|
|
seq_puts(m, "\n");
|
|
|
|
}
|
|
|
|
kunmap_atomic(pt_vaddr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
/* Write pde (index) from the page directory @pd to the page table @pt */
|
2015-04-08 19:13:23 +08:00
|
|
|
static void gen6_write_pde(struct i915_page_directory *pd,
|
|
|
|
const int pde, struct i915_page_table *pt)
|
2013-04-09 09:43:54 +08:00
|
|
|
{
|
2015-03-17 00:00:56 +08:00
|
|
|
/* Caller needs to make sure the write completes if necessary */
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(pd, struct i915_hw_ppgtt, pd);
|
|
|
|
u32 pd_entry;
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
|
|
|
|
pd_entry |= GEN6_PDE_VALID;
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
writel(pd_entry, ppgtt->pd_addr + pde);
|
|
|
|
}
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
/* Write all the page tables found in the ppgtt structure to incrementing page
|
|
|
|
* directories. */
|
|
|
|
static void gen6_write_page_range(struct drm_i915_private *dev_priv,
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_directory *pd,
|
2015-03-17 00:00:56 +08:00
|
|
|
uint32_t start, uint32_t length)
|
|
|
|
{
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *pt;
|
2015-03-17 00:00:56 +08:00
|
|
|
uint32_t pde, temp;
|
|
|
|
|
|
|
|
gen6_for_each_pde(pt, pd, start, length, temp, pde)
|
|
|
|
gen6_write_pde(pd, pde, pt);
|
|
|
|
|
|
|
|
/* Make sure write is complete before other code can use this page
|
|
|
|
* table. Also require for WC mapped PTEs */
|
|
|
|
readl(dev_priv->gtt.gsm);
|
2013-04-24 14:15:32 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
|
2013-04-24 14:15:32 +08:00
|
|
|
{
|
2015-02-25 00:22:35 +08:00
|
|
|
BUG_ON(ppgtt->pd.pd_offset & 0x3f);
|
2013-12-07 06:11:09 +08:00
|
|
|
|
2015-02-25 00:22:35 +08:00
|
|
|
return (ppgtt->pd.pd_offset / 64) << 16;
|
2013-12-07 06:11:09 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:12 +08:00
|
|
|
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
|
2014-08-16 01:51:35 +08:00
|
|
|
struct intel_engine_cs *ring)
|
2013-12-07 06:11:12 +08:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* NB: TLBs must be flushed and invalidated before a switch */
|
|
|
|
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = intel_ring_begin(ring, 6);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
|
|
|
|
intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
|
|
|
|
intel_ring_emit(ring, PP_DIR_DCLV_2G);
|
|
|
|
intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
|
|
|
|
intel_ring_emit(ring, get_pd_offset(ppgtt));
|
|
|
|
intel_ring_emit(ring, MI_NOOP);
|
|
|
|
intel_ring_advance(ring);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-02-10 19:05:54 +08:00
|
|
|
static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
|
|
|
|
struct intel_engine_cs *ring)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
|
|
|
|
|
|
|
|
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
|
|
|
|
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:11 +08:00
|
|
|
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
|
2014-08-16 01:51:35 +08:00
|
|
|
struct intel_engine_cs *ring)
|
2013-12-07 06:11:11 +08:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* NB: TLBs must be flushed and invalidated before a switch */
|
|
|
|
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = intel_ring_begin(ring, 6);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
|
|
|
|
intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
|
|
|
|
intel_ring_emit(ring, PP_DIR_DCLV_2G);
|
|
|
|
intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
|
|
|
|
intel_ring_emit(ring, get_pd_offset(ppgtt));
|
|
|
|
intel_ring_emit(ring, MI_NOOP);
|
|
|
|
intel_ring_advance(ring);
|
|
|
|
|
2013-12-07 06:11:12 +08:00
|
|
|
/* XXX: RCS is the only one to auto invalidate the TLBs? */
|
|
|
|
if (ring->id != RCS) {
|
|
|
|
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:11 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-12-07 06:11:10 +08:00
|
|
|
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
|
2014-08-16 01:51:35 +08:00
|
|
|
struct intel_engine_cs *ring)
|
2013-12-07 06:11:10 +08:00
|
|
|
{
|
|
|
|
struct drm_device *dev = ppgtt->base.dev;
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
|
2013-12-07 06:11:11 +08:00
|
|
|
|
2013-12-07 06:11:10 +08:00
|
|
|
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
|
|
|
|
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
|
|
|
|
|
|
|
|
POSTING_READ(RING_PP_DIR_DCLV(ring));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-08-07 02:19:53 +08:00
|
|
|
static void gen8_ppgtt_enable(struct drm_device *dev)
|
2013-12-07 06:11:10 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2014-05-22 21:13:33 +08:00
|
|
|
struct intel_engine_cs *ring;
|
2014-08-07 02:19:53 +08:00
|
|
|
int j;
|
2013-04-24 14:15:32 +08:00
|
|
|
|
2013-12-07 06:11:10 +08:00
|
|
|
for_each_ring(ring, dev_priv, j) {
|
|
|
|
I915_WRITE(RING_MODE_GEN7(ring),
|
|
|
|
_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
|
|
|
|
}
|
|
|
|
}
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2014-08-07 02:19:53 +08:00
|
|
|
static void gen7_ppgtt_enable(struct drm_device *dev)
|
2013-04-24 14:15:32 +08:00
|
|
|
{
|
2014-03-31 19:27:21 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2014-05-22 21:13:33 +08:00
|
|
|
struct intel_engine_cs *ring;
|
2013-12-07 06:11:09 +08:00
|
|
|
uint32_t ecochk, ecobits;
|
2013-04-24 14:15:32 +08:00
|
|
|
int i;
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
ecobits = I915_READ(GAC_ECO_BITS);
|
|
|
|
I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
|
2013-04-04 20:13:41 +08:00
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
ecochk = I915_READ(GAM_ECOCHK);
|
|
|
|
if (IS_HASWELL(dev)) {
|
|
|
|
ecochk |= ECOCHK_PPGTT_WB_HSW;
|
|
|
|
} else {
|
|
|
|
ecochk |= ECOCHK_PPGTT_LLC_IVB;
|
|
|
|
ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
|
|
|
|
}
|
|
|
|
I915_WRITE(GAM_ECOCHK, ecochk);
|
2013-04-04 20:13:41 +08:00
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
for_each_ring(ring, dev_priv, i) {
|
2013-04-09 09:43:54 +08:00
|
|
|
/* GFX_MODE is per-ring on gen7+ */
|
2013-12-07 06:11:09 +08:00
|
|
|
I915_WRITE(RING_MODE_GEN7(ring),
|
|
|
|
_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
|
2013-04-09 09:43:54 +08:00
|
|
|
}
|
2013-12-07 06:11:09 +08:00
|
|
|
}
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2014-08-07 02:19:53 +08:00
|
|
|
static void gen6_ppgtt_enable(struct drm_device *dev)
|
2013-12-07 06:11:09 +08:00
|
|
|
{
|
2014-03-31 19:27:21 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2013-12-07 06:11:09 +08:00
|
|
|
uint32_t ecochk, gab_ctl, ecobits;
|
2013-04-04 20:13:41 +08:00
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
ecobits = I915_READ(GAC_ECO_BITS);
|
|
|
|
I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
|
|
|
|
ECOBITS_PPGTT_CACHE64B);
|
2013-04-09 09:43:54 +08:00
|
|
|
|
2013-12-07 06:11:09 +08:00
|
|
|
gab_ctl = I915_READ(GAB_CTL);
|
|
|
|
I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
|
|
|
|
|
|
|
|
ecochk = I915_READ(GAM_ECOCHK);
|
|
|
|
I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
|
|
|
|
|
|
|
|
I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
|
2013-04-09 09:43:54 +08:00
|
|
|
}
|
|
|
|
|
2012-02-10 00:15:46 +08:00
|
|
|
/* PPGTT support for Sandybdrige/Gen6 and later */
|
2013-07-17 07:50:05 +08:00
|
|
|
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
|
|
|
uint64_t length,
|
2013-10-17 00:21:30 +08:00
|
|
|
bool use_scratch)
|
2012-02-10 00:15:46 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t *pt_vaddr, scratch_pte;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
|
|
|
unsigned num_entries = length >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
unsigned act_pt = first_entry / GEN6_PTES;
|
|
|
|
unsigned first_pte = first_entry % GEN6_PTES;
|
2012-02-10 00:15:47 +08:00
|
|
|
unsigned last_pte, i;
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2014-06-17 13:29:42 +08:00
|
|
|
scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2012-02-10 00:15:47 +08:00
|
|
|
while (num_entries) {
|
|
|
|
last_pte = first_pte + num_entries;
|
2015-03-17 00:00:54 +08:00
|
|
|
if (last_pte > GEN6_PTES)
|
|
|
|
last_pte = GEN6_PTES;
|
2012-02-10 00:15:47 +08:00
|
|
|
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2012-02-10 00:15:47 +08:00
|
|
|
for (i = first_pte; i < last_pte; i++)
|
|
|
|
pt_vaddr[i] = scratch_pte;
|
2012-02-10 00:15:46 +08:00
|
|
|
|
|
|
|
kunmap_atomic(pt_vaddr);
|
|
|
|
|
2012-02-10 00:15:47 +08:00
|
|
|
num_entries -= last_pte - first_pte;
|
|
|
|
first_pte = 0;
|
2013-03-20 06:48:39 +08:00
|
|
|
act_pt++;
|
2012-02-10 00:15:47 +08:00
|
|
|
}
|
2012-02-10 00:15:46 +08:00
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
|
2013-01-25 06:44:56 +08:00
|
|
|
struct sg_table *pages,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
2014-06-17 13:29:42 +08:00
|
|
|
enum i915_cache_level cache_level, u32 flags)
|
2013-01-25 06:44:56 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t *pt_vaddr;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
unsigned act_pt = first_entry / GEN6_PTES;
|
|
|
|
unsigned act_pte = first_entry % GEN6_PTES;
|
2013-02-19 01:28:04 +08:00
|
|
|
struct sg_page_iter sg_iter;
|
|
|
|
|
2013-12-31 23:50:30 +08:00
|
|
|
pt_vaddr = NULL;
|
2013-02-19 01:28:04 +08:00
|
|
|
for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
|
2013-12-31 23:50:30 +08:00
|
|
|
if (pt_vaddr == NULL)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
|
2013-02-19 01:28:04 +08:00
|
|
|
|
2013-12-31 23:50:30 +08:00
|
|
|
pt_vaddr[act_pte] =
|
|
|
|
vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
|
2014-06-17 13:29:42 +08:00
|
|
|
cache_level, true, flags);
|
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
if (++act_pte == GEN6_PTES) {
|
2013-02-19 01:28:04 +08:00
|
|
|
kunmap_atomic(pt_vaddr);
|
2013-12-31 23:50:30 +08:00
|
|
|
pt_vaddr = NULL;
|
2013-03-20 06:48:39 +08:00
|
|
|
act_pt++;
|
2013-02-19 01:28:04 +08:00
|
|
|
act_pte = 0;
|
2013-01-25 06:44:56 +08:00
|
|
|
}
|
|
|
|
}
|
2013-12-31 23:50:30 +08:00
|
|
|
if (pt_vaddr)
|
|
|
|
kunmap_atomic(pt_vaddr);
|
2013-01-25 06:44:56 +08:00
|
|
|
}
|
|
|
|
|
drm/i915: Track page table reload need
This patch was formerly known as, "Force pd restore when PDEs change,
gen6-7." I had to change the name because it is needed for GEN8 too.
The real issue this is trying to solve is when a new object is mapped
into the current address space. The GPU does not snoop the new mapping
so we must do the gen specific action to reload the page tables.
GEN8 and GEN7 do differ in the way they load page tables for the RCS.
GEN8 does so with the context restore, while GEN7 requires the proper
load commands in the command streamer. Non-render is similar for both.
Caveat for GEN7
The docs say you cannot change the PDEs of a currently running context.
We never map new PDEs of a running context, and expect them to be
present - so I think this is okay. (We can unmap, but this should also
be okay since we only unmap unreferenced objects that the GPU shouldn't
be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag
to signal that even if the context is the same, force a reload. It's
unclear exactly what this does, but I have a hunch it's the right thing
to do.
The logic assumes that we always emit a context switch after mapping new
PDEs, and before we submit a batch. This is the case today, and has been
the case since the inception of hardware contexts. A note in the comment
let's the user know.
It's not just for gen8. If the current context has mappings change, we
need a context reload to switch
v2: Rebased after ppgtt clean up patches. Split the warning for aliasing
and true ppgtt options. And do not break aliasing ppgtt, where to->ppgtt
is always null.
v3: Invalidate PPGTT TLBs inside alloc_va_range.
v4: Rename ppgtt_invalidate_tlbs to mark_tlbs_dirty and move
pd_dirty_rings from i915_address_space to i915_hw_ppgtt. Fixes when
neither ctx->ppgtt and aliasing_ppgtt exist.
v5: Removed references to teardown_va_range.
v6: Updated needs_pd_load_pre/post.
v7: Fix pd_dirty_rings check in needs_pd_load_post, and update/move
comment about updated PDEs to object_pin/bind (Mika).
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-19 20:53:28 +08:00
|
|
|
/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
|
|
|
|
* are switching between contexts with the same LRCA, we also must do a force
|
|
|
|
* restore.
|
|
|
|
*/
|
2015-04-14 23:35:26 +08:00
|
|
|
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
|
drm/i915: Track page table reload need
This patch was formerly known as, "Force pd restore when PDEs change,
gen6-7." I had to change the name because it is needed for GEN8 too.
The real issue this is trying to solve is when a new object is mapped
into the current address space. The GPU does not snoop the new mapping
so we must do the gen specific action to reload the page tables.
GEN8 and GEN7 do differ in the way they load page tables for the RCS.
GEN8 does so with the context restore, while GEN7 requires the proper
load commands in the command streamer. Non-render is similar for both.
Caveat for GEN7
The docs say you cannot change the PDEs of a currently running context.
We never map new PDEs of a running context, and expect them to be
present - so I think this is okay. (We can unmap, but this should also
be okay since we only unmap unreferenced objects that the GPU shouldn't
be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag
to signal that even if the context is the same, force a reload. It's
unclear exactly what this does, but I have a hunch it's the right thing
to do.
The logic assumes that we always emit a context switch after mapping new
PDEs, and before we submit a batch. This is the case today, and has been
the case since the inception of hardware contexts. A note in the comment
let's the user know.
It's not just for gen8. If the current context has mappings change, we
need a context reload to switch
v2: Rebased after ppgtt clean up patches. Split the warning for aliasing
and true ppgtt options. And do not break aliasing ppgtt, where to->ppgtt
is always null.
v3: Invalidate PPGTT TLBs inside alloc_va_range.
v4: Rename ppgtt_invalidate_tlbs to mark_tlbs_dirty and move
pd_dirty_rings from i915_address_space to i915_hw_ppgtt. Fixes when
neither ctx->ppgtt and aliasing_ppgtt exist.
v5: Removed references to teardown_va_range.
v6: Updated needs_pd_load_pre/post.
v7: Fix pd_dirty_rings check in needs_pd_load_post, and update/move
comment about updated PDEs to object_pin/bind (Mika).
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-19 20:53:28 +08:00
|
|
|
{
|
|
|
|
/* If current vm != vm, */
|
|
|
|
ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
|
|
|
|
}
|
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
static void gen6_initialize_pt(struct i915_address_space *vm,
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *pt)
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
{
|
|
|
|
gen6_pte_t *pt_vaddr, scratch_pte;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
WARN_ON(vm->scratch.addr == 0);
|
|
|
|
|
|
|
|
scratch_pte = vm->pte_encode(vm->scratch.addr,
|
|
|
|
I915_CACHE_LLC, true, 0);
|
|
|
|
|
|
|
|
pt_vaddr = kmap_atomic(pt->page);
|
|
|
|
|
|
|
|
for (i = 0; i < GEN6_PTES; i++)
|
|
|
|
pt_vaddr[i] = scratch_pte;
|
|
|
|
|
|
|
|
kunmap_atomic(pt_vaddr);
|
|
|
|
}
|
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
static int gen6_alloc_va_range(struct i915_address_space *vm,
|
|
|
|
uint64_t start, uint64_t length)
|
|
|
|
{
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
DECLARE_BITMAP(new_page_tables, I915_PDES);
|
|
|
|
struct drm_device *dev = vm->dev;
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2015-03-17 00:00:56 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *pt;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
const uint32_t start_save = start, length_save = length;
|
2015-03-17 00:00:56 +08:00
|
|
|
uint32_t pde, temp;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
WARN_ON(upper_32_bits(start));
|
|
|
|
|
|
|
|
bitmap_zero(new_page_tables, I915_PDES);
|
|
|
|
|
|
|
|
/* The allocation is done in two stages so that we can bail out with
|
|
|
|
* minimal amount of pain. The first stage finds new page tables that
|
|
|
|
* need allocation. The second stage marks use ptes within the page
|
|
|
|
* tables.
|
|
|
|
*/
|
|
|
|
gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
|
|
|
|
if (pt != ppgtt->scratch_pt) {
|
|
|
|
WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We've already allocated a page table */
|
|
|
|
WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
|
|
|
|
|
|
|
|
pt = alloc_pt_single(dev);
|
|
|
|
if (IS_ERR(pt)) {
|
|
|
|
ret = PTR_ERR(pt);
|
|
|
|
goto unwind_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
gen6_initialize_pt(vm, pt);
|
|
|
|
|
|
|
|
ppgtt->pd.page_table[pde] = pt;
|
|
|
|
set_bit(pde, new_page_tables);
|
2015-03-24 23:46:23 +08:00
|
|
|
trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
start = start_save;
|
|
|
|
length = length_save;
|
2015-03-17 00:00:56 +08:00
|
|
|
|
|
|
|
gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
|
|
|
|
DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
|
|
|
|
|
|
|
|
bitmap_zero(tmp_bitmap, GEN6_PTES);
|
|
|
|
bitmap_set(tmp_bitmap, gen6_pte_index(start),
|
|
|
|
gen6_pte_count(start, length));
|
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
if (test_and_clear_bit(pde, new_page_tables))
|
|
|
|
gen6_write_pde(&ppgtt->pd, pde, pt);
|
|
|
|
|
2015-03-24 23:46:23 +08:00
|
|
|
trace_i915_page_table_entry_map(vm, pde, pt,
|
|
|
|
gen6_pte_index(start),
|
|
|
|
gen6_pte_count(start, length),
|
|
|
|
GEN6_PTES);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
|
2015-03-17 00:00:56 +08:00
|
|
|
GEN6_PTES);
|
|
|
|
}
|
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
|
|
|
|
|
|
|
|
/* Make sure write is complete before other code can use this page
|
|
|
|
* table. Also require for WC mapped PTEs */
|
|
|
|
readl(dev_priv->gtt.gsm);
|
|
|
|
|
drm/i915: Track page table reload need
This patch was formerly known as, "Force pd restore when PDEs change,
gen6-7." I had to change the name because it is needed for GEN8 too.
The real issue this is trying to solve is when a new object is mapped
into the current address space. The GPU does not snoop the new mapping
so we must do the gen specific action to reload the page tables.
GEN8 and GEN7 do differ in the way they load page tables for the RCS.
GEN8 does so with the context restore, while GEN7 requires the proper
load commands in the command streamer. Non-render is similar for both.
Caveat for GEN7
The docs say you cannot change the PDEs of a currently running context.
We never map new PDEs of a running context, and expect them to be
present - so I think this is okay. (We can unmap, but this should also
be okay since we only unmap unreferenced objects that the GPU shouldn't
be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag
to signal that even if the context is the same, force a reload. It's
unclear exactly what this does, but I have a hunch it's the right thing
to do.
The logic assumes that we always emit a context switch after mapping new
PDEs, and before we submit a batch. This is the case today, and has been
the case since the inception of hardware contexts. A note in the comment
let's the user know.
It's not just for gen8. If the current context has mappings change, we
need a context reload to switch
v2: Rebased after ppgtt clean up patches. Split the warning for aliasing
and true ppgtt options. And do not break aliasing ppgtt, where to->ppgtt
is always null.
v3: Invalidate PPGTT TLBs inside alloc_va_range.
v4: Rename ppgtt_invalidate_tlbs to mark_tlbs_dirty and move
pd_dirty_rings from i915_address_space to i915_hw_ppgtt. Fixes when
neither ctx->ppgtt and aliasing_ppgtt exist.
v5: Removed references to teardown_va_range.
v6: Updated needs_pd_load_pre/post.
v7: Fix pd_dirty_rings check in needs_pd_load_post, and update/move
comment about updated PDEs to object_pin/bind (Mika).
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-19 20:53:28 +08:00
|
|
|
mark_tlbs_dirty(ppgtt);
|
2015-03-17 00:00:56 +08:00
|
|
|
return 0;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
|
|
|
|
unwind_out:
|
|
|
|
for_each_set_bit(pde, new_page_tables, I915_PDES) {
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *pt = ppgtt->pd.page_table[pde];
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
|
|
|
|
ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
|
|
|
|
unmap_and_free_pt(pt, vm->dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
mark_tlbs_dirty(ppgtt);
|
|
|
|
return ret;
|
2015-03-17 00:00:56 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:13 +08:00
|
|
|
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
|
2014-02-20 14:05:48 +08:00
|
|
|
{
|
2015-04-14 23:35:13 +08:00
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt, base);
|
2015-04-08 19:13:30 +08:00
|
|
|
struct i915_page_table *pt;
|
|
|
|
uint32_t pde;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
|
2015-04-14 23:35:13 +08:00
|
|
|
|
|
|
|
drm_mm_remove_node(&ppgtt->node);
|
|
|
|
|
2015-04-08 19:13:30 +08:00
|
|
|
gen6_for_all_pdes(pt, ppgtt, pde) {
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
if (pt != ppgtt->scratch_pt)
|
2015-04-08 19:13:30 +08:00
|
|
|
unmap_and_free_pt(pt, ppgtt->base.dev);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
}
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-25 00:22:36 +08:00
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
|
2015-04-08 19:13:32 +08:00
|
|
|
unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev);
|
2013-01-25 05:49:56 +08:00
|
|
|
}
|
|
|
|
|
2014-02-20 14:05:49 +08:00
|
|
|
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
|
2013-01-25 05:49:56 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
struct drm_device *dev = ppgtt->base.dev;
|
2012-02-10 00:15:46 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2013-12-07 06:11:08 +08:00
|
|
|
bool retried = false;
|
2014-02-20 14:05:49 +08:00
|
|
|
int ret;
|
2012-02-10 00:15:46 +08:00
|
|
|
|
drm/i915: Use drm_mm for PPGTT PDEs
When PPGTT support was originally enabled, it was only designed to
support 1 PPGTT. It therefore made sense to simply hide the GGTT space
required to enable this from the drm_mm allocator.
Since we intend to support full PPGTT, which means more than 1, and they
can be created and destroyed ad hoc it will be required to use the
proper allocation techniques we already have.
The first step here is to make the existing single PPGTT use the
allocator.
The astute observer will notice that we are reserving space in the GGTT
for the PDEs for the lifetime of the address space, and would be right
to question whether or not this is a good idea. It does not make a
difference with this current patch only the aliasing PPGTT (indeed the
PDEs should still be hidden from the shrinker). For the future, we are
allocating from top to bottom to avoid using the precious "gtt
space" The GGTT space at that point should only be used for scanout, HW
contexts, ringbuffers, HWSP, PDEs, and a couple of other small buffers
(potentially) used by the kernel. Everything else should be mapped into
a PPGTT. To put the consumption in more tangible terms, it takes
approximately 4 sets of PDEs to equal one 19x10 framebuffer (with no
fancy stride or alignment constraints). 3/4 of the total [average] GGTT
can be used for PDEs, and hopefully never touch the 1/4 that the
framebuffer needs.
The astute, and persistent observer might ask about the page tables
which are also pinned for the address space. This waste is unfortunate.
We use 2MB of memory per address space. We leave wrapping the PDEs as a
real GEM object as a TODO.
v2: Align PDEs to 64b in GTT
Allocate the node dynamically so we can use drm_mm_put_block
Now tested on IGT
Allocate node at the top to avoid fragmentation (Chris)
v3: Use Chris' top down allocator
v4: Embed drm_mm_node into ppgtt struct (Jesse)
Remove hunks which didn't belong (Jesse)
v5: Don't subtract guard page since we now killed the guard page prior
to this patch. (Ben)
v6: Rebased and removed guard page stuff.
Added a chunk to the commit message
Allow adding a context to mappable region
v7: Undo v3, so we can make the drm patch last in the series
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v4)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
squash: drm/i915: allow PPGTT to use mappable
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:11:07 +08:00
|
|
|
/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
|
|
|
|
* allocator works in address space sizes, so it's multiplied by page
|
|
|
|
* size. We allocate at the top of the GTT to avoid fragmentation.
|
|
|
|
*/
|
|
|
|
BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
|
|
|
|
if (IS_ERR(ppgtt->scratch_pt))
|
|
|
|
return PTR_ERR(ppgtt->scratch_pt);
|
|
|
|
|
|
|
|
gen6_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
|
|
|
|
|
2013-12-07 06:11:08 +08:00
|
|
|
alloc:
|
drm/i915: Use drm_mm for PPGTT PDEs
When PPGTT support was originally enabled, it was only designed to
support 1 PPGTT. It therefore made sense to simply hide the GGTT space
required to enable this from the drm_mm allocator.
Since we intend to support full PPGTT, which means more than 1, and they
can be created and destroyed ad hoc it will be required to use the
proper allocation techniques we already have.
The first step here is to make the existing single PPGTT use the
allocator.
The astute observer will notice that we are reserving space in the GGTT
for the PDEs for the lifetime of the address space, and would be right
to question whether or not this is a good idea. It does not make a
difference with this current patch only the aliasing PPGTT (indeed the
PDEs should still be hidden from the shrinker). For the future, we are
allocating from top to bottom to avoid using the precious "gtt
space" The GGTT space at that point should only be used for scanout, HW
contexts, ringbuffers, HWSP, PDEs, and a couple of other small buffers
(potentially) used by the kernel. Everything else should be mapped into
a PPGTT. To put the consumption in more tangible terms, it takes
approximately 4 sets of PDEs to equal one 19x10 framebuffer (with no
fancy stride or alignment constraints). 3/4 of the total [average] GGTT
can be used for PDEs, and hopefully never touch the 1/4 that the
framebuffer needs.
The astute, and persistent observer might ask about the page tables
which are also pinned for the address space. This waste is unfortunate.
We use 2MB of memory per address space. We leave wrapping the PDEs as a
real GEM object as a TODO.
v2: Align PDEs to 64b in GTT
Allocate the node dynamically so we can use drm_mm_put_block
Now tested on IGT
Allocate node at the top to avoid fragmentation (Chris)
v3: Use Chris' top down allocator
v4: Embed drm_mm_node into ppgtt struct (Jesse)
Remove hunks which didn't belong (Jesse)
v5: Don't subtract guard page since we now killed the guard page prior
to this patch. (Ben)
v6: Rebased and removed guard page stuff.
Added a chunk to the commit message
Allow adding a context to mappable region
v7: Undo v3, so we can make the drm patch last in the series
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v4)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
squash: drm/i915: allow PPGTT to use mappable
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:11:07 +08:00
|
|
|
ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
|
|
|
|
&ppgtt->node, GEN6_PD_SIZE,
|
|
|
|
GEN6_PD_ALIGN, 0,
|
|
|
|
0, dev_priv->gtt.base.total,
|
2014-05-07 13:21:30 +08:00
|
|
|
DRM_MM_TOPDOWN);
|
2013-12-07 06:11:08 +08:00
|
|
|
if (ret == -ENOSPC && !retried) {
|
|
|
|
ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
|
|
|
|
GEN6_PD_SIZE, GEN6_PD_ALIGN,
|
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch
buffers on the fly after generation as they are being passed to the
kernel for execution. These batches also contain self-referenced
relocations as a single buffer encompasses the state commands, kernels,
vertices and sampler. During generation the buffers are placed at known
offsets within the full batch, and then the relocation deltas (as passed
to the kernel) are tweaked as the batch is repacked into a smaller buffer.
This means that userspace is passing negative relocations deltas, which
subsequently wrap to large values if the batch is at a low address. The
GPU hangs when it then tries to use the large value as a base for its
address offsets, rather than wrapping back to the real value (as one
would hope). As the GPU uses positive offsets from the base, we can
treat the relocation address as the minimum address read by the GPU.
For the upper bound, we trust that userspace will not read beyond the
end of the buffer.
So, how do we fix negative relocations from wrapping? We can either
check that every relocation looks valid when we write it, and then
position each object such that we prevent the offset wraparound, or we
just special-case the self-referential behaviour of SNA and force all
batches to be above 256k. Daniel prefers the latter approach.
This fixes a GPU hang when it tries to use an address (relocation +
offset) greater than the GTT size. The issue would occur quite easily
with full-ppgtt as each fd gets its own VM space, so low offsets would
often be handed out. However, with the rearrangement of the low GTT due
to capturing the BIOS framebuffer, it is already affecting kernels 3.15
onwards. I think only IVB+ is susceptible to this bug, but the workaround
should only kick in rarely, so it seems sensible to always apply it.
v3: Use a bias for batch buffers to prevent small negative delta relocations
from wrapping.
v4 from Daniel:
- s/BIAS/BATCH_OFFSET_BIAS/
- Extract eb_vma_misplaced/i915_vma_misplaced since the conditions
were growing rather cumbersome.
- Add a comment to eb_get_batch explaining why we do this.
- Apply the batch offset bias everywhere but mention that we've only
observed it on gen7 gpus.
- Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch.
v5: Add static to eb_get_batch, spotted by 0-day tester.
Testcase: igt/gem_bad_reloc
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-23 14:48:08 +08:00
|
|
|
I915_CACHE_NONE,
|
|
|
|
0, dev_priv->gtt.base.total,
|
|
|
|
0);
|
2013-12-07 06:11:08 +08:00
|
|
|
if (ret)
|
2015-03-17 00:00:56 +08:00
|
|
|
goto err_out;
|
2013-12-07 06:11:08 +08:00
|
|
|
|
|
|
|
retried = true;
|
|
|
|
goto alloc;
|
|
|
|
}
|
drm/i915: Use drm_mm for PPGTT PDEs
When PPGTT support was originally enabled, it was only designed to
support 1 PPGTT. It therefore made sense to simply hide the GGTT space
required to enable this from the drm_mm allocator.
Since we intend to support full PPGTT, which means more than 1, and they
can be created and destroyed ad hoc it will be required to use the
proper allocation techniques we already have.
The first step here is to make the existing single PPGTT use the
allocator.
The astute observer will notice that we are reserving space in the GGTT
for the PDEs for the lifetime of the address space, and would be right
to question whether or not this is a good idea. It does not make a
difference with this current patch only the aliasing PPGTT (indeed the
PDEs should still be hidden from the shrinker). For the future, we are
allocating from top to bottom to avoid using the precious "gtt
space" The GGTT space at that point should only be used for scanout, HW
contexts, ringbuffers, HWSP, PDEs, and a couple of other small buffers
(potentially) used by the kernel. Everything else should be mapped into
a PPGTT. To put the consumption in more tangible terms, it takes
approximately 4 sets of PDEs to equal one 19x10 framebuffer (with no
fancy stride or alignment constraints). 3/4 of the total [average] GGTT
can be used for PDEs, and hopefully never touch the 1/4 that the
framebuffer needs.
The astute, and persistent observer might ask about the page tables
which are also pinned for the address space. This waste is unfortunate.
We use 2MB of memory per address space. We leave wrapping the PDEs as a
real GEM object as a TODO.
v2: Align PDEs to 64b in GTT
Allocate the node dynamically so we can use drm_mm_put_block
Now tested on IGT
Allocate node at the top to avoid fragmentation (Chris)
v3: Use Chris' top down allocator
v4: Embed drm_mm_node into ppgtt struct (Jesse)
Remove hunks which didn't belong (Jesse)
v5: Don't subtract guard page since we now killed the guard page prior
to this patch. (Ben)
v6: Rebased and removed guard page stuff.
Added a chunk to the commit message
Allow adding a context to mappable region
v7: Undo v3, so we can make the drm patch last in the series
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v4)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
squash: drm/i915: allow PPGTT to use mappable
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:11:07 +08:00
|
|
|
|
2015-01-23 01:01:25 +08:00
|
|
|
if (ret)
|
2015-03-17 00:00:56 +08:00
|
|
|
goto err_out;
|
|
|
|
|
2015-01-23 01:01:25 +08:00
|
|
|
|
drm/i915: Use drm_mm for PPGTT PDEs
When PPGTT support was originally enabled, it was only designed to
support 1 PPGTT. It therefore made sense to simply hide the GGTT space
required to enable this from the drm_mm allocator.
Since we intend to support full PPGTT, which means more than 1, and they
can be created and destroyed ad hoc it will be required to use the
proper allocation techniques we already have.
The first step here is to make the existing single PPGTT use the
allocator.
The astute observer will notice that we are reserving space in the GGTT
for the PDEs for the lifetime of the address space, and would be right
to question whether or not this is a good idea. It does not make a
difference with this current patch only the aliasing PPGTT (indeed the
PDEs should still be hidden from the shrinker). For the future, we are
allocating from top to bottom to avoid using the precious "gtt
space" The GGTT space at that point should only be used for scanout, HW
contexts, ringbuffers, HWSP, PDEs, and a couple of other small buffers
(potentially) used by the kernel. Everything else should be mapped into
a PPGTT. To put the consumption in more tangible terms, it takes
approximately 4 sets of PDEs to equal one 19x10 framebuffer (with no
fancy stride or alignment constraints). 3/4 of the total [average] GGTT
can be used for PDEs, and hopefully never touch the 1/4 that the
framebuffer needs.
The astute, and persistent observer might ask about the page tables
which are also pinned for the address space. This waste is unfortunate.
We use 2MB of memory per address space. We leave wrapping the PDEs as a
real GEM object as a TODO.
v2: Align PDEs to 64b in GTT
Allocate the node dynamically so we can use drm_mm_put_block
Now tested on IGT
Allocate node at the top to avoid fragmentation (Chris)
v3: Use Chris' top down allocator
v4: Embed drm_mm_node into ppgtt struct (Jesse)
Remove hunks which didn't belong (Jesse)
v5: Don't subtract guard page since we now killed the guard page prior
to this patch. (Ben)
v6: Rebased and removed guard page stuff.
Added a chunk to the commit message
Allow adding a context to mappable region
v7: Undo v3, so we can make the drm patch last in the series
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v4)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
squash: drm/i915: allow PPGTT to use mappable
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:11:07 +08:00
|
|
|
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
|
|
|
|
DRM_DEBUG("Forced to use aperture for PDEs\n");
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2015-01-23 01:01:25 +08:00
|
|
|
return 0;
|
2015-03-17 00:00:56 +08:00
|
|
|
|
|
|
|
err_out:
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
|
2015-03-17 00:00:56 +08:00
|
|
|
return ret;
|
2014-02-20 14:05:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
|
|
|
|
{
|
2015-03-27 19:26:35 +08:00
|
|
|
return gen6_ppgtt_allocate_page_directories(ppgtt);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
}
|
2015-02-25 00:22:37 +08:00
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
|
|
|
|
uint64_t start, uint64_t length)
|
|
|
|
{
|
2015-04-08 19:13:23 +08:00
|
|
|
struct i915_page_table *unused;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
uint32_t pde, temp;
|
2012-02-10 00:15:46 +08:00
|
|
|
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
|
|
|
|
ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
|
2014-02-20 14:05:49 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
|
2014-02-20 14:05:49 +08:00
|
|
|
{
|
|
|
|
struct drm_device *dev = ppgtt->base.dev;
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
|
|
|
|
if (IS_GEN6(dev)) {
|
|
|
|
ppgtt->switch_mm = gen6_mm_switch;
|
|
|
|
} else if (IS_HASWELL(dev)) {
|
|
|
|
ppgtt->switch_mm = hsw_mm_switch;
|
|
|
|
} else if (IS_GEN7(dev)) {
|
|
|
|
ppgtt->switch_mm = gen7_mm_switch;
|
|
|
|
} else
|
|
|
|
BUG();
|
|
|
|
|
2015-02-10 19:05:54 +08:00
|
|
|
if (intel_vgpu_active(dev))
|
|
|
|
ppgtt->switch_mm = vgpu_mm_switch;
|
|
|
|
|
2014-02-20 14:05:49 +08:00
|
|
|
ret = gen6_ppgtt_alloc(ppgtt);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
ppgtt->base.allocate_va_range = gen6_alloc_va_range;
|
2014-02-20 14:05:49 +08:00
|
|
|
ppgtt->base.clear_range = gen6_ppgtt_clear_range;
|
|
|
|
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
|
2015-04-14 23:35:12 +08:00
|
|
|
ppgtt->base.unbind_vma = ppgtt_unbind_vma;
|
|
|
|
ppgtt->base.bind_vma = ppgtt_bind_vma;
|
2014-02-20 14:05:49 +08:00
|
|
|
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
|
|
|
|
ppgtt->base.start = 0;
|
2015-04-08 19:13:30 +08:00
|
|
|
ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
|
2013-12-07 06:11:29 +08:00
|
|
|
ppgtt->debug_dump = gen6_dump_ppgtt;
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2015-02-25 00:22:35 +08:00
|
|
|
ppgtt->pd.pd_offset =
|
2015-03-17 00:00:54 +08:00
|
|
|
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
|
|
|
|
ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
|
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2015-03-17 00:00:56 +08:00
|
|
|
gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
|
|
|
|
|
2015-01-23 16:05:06 +08:00
|
|
|
DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
|
2014-02-20 14:05:49 +08:00
|
|
|
ppgtt->node.size >> 20,
|
|
|
|
ppgtt->node.start / PAGE_SIZE);
|
2013-01-25 05:49:56 +08:00
|
|
|
|
2014-08-07 02:19:54 +08:00
|
|
|
DRM_DEBUG("Adding PPGTT at offset %x\n",
|
2015-02-25 00:22:35 +08:00
|
|
|
ppgtt->pd.pd_offset << 10);
|
2014-08-07 02:19:54 +08:00
|
|
|
|
2014-02-20 14:05:49 +08:00
|
|
|
return 0;
|
2013-01-25 05:49:56 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
|
2013-01-25 05:49:56 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
ppgtt->base.dev = dev;
|
2014-03-09 03:58:16 +08:00
|
|
|
ppgtt->base.scratch = dev_priv->gtt.base.scratch;
|
2013-01-25 05:49:56 +08:00
|
|
|
|
2013-04-09 09:43:53 +08:00
|
|
|
if (INTEL_INFO(dev)->gen < 8)
|
2015-04-14 23:35:14 +08:00
|
|
|
return gen6_ppgtt_init(ppgtt);
|
2013-04-09 09:43:53 +08:00
|
|
|
else
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 19:13:34 +08:00
|
|
|
return gen8_ppgtt_init(ppgtt);
|
2014-08-07 02:19:54 +08:00
|
|
|
}
|
|
|
|
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
int ret = 0;
|
2013-04-09 09:43:53 +08:00
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
ret = __hw_ppgtt_init(dev, ppgtt);
|
2014-08-07 02:19:54 +08:00
|
|
|
if (ret == 0) {
|
drm/i915: Add VM to context
Pretty straightforward so far except for the bit about the refcounting.
The PPGTT will potentially be shared amongst multiple contexts. Because
contexts themselves have a refcounted lifecycle, the easiest way to
manage this will be to refcount the PPGTT. To acheive this, we piggy
back off of the existing context refcount, and will increment and
decrement the PPGTT refcount with context creation, and destruction.
To put it more clearly, if context A, and context B both use PPGTT 0, we
can't free the PPGTT until both A, and B are destroyed.
Note that because the PPGTT is permanently pinned (for now), it really
just matters for the PPGTT destruction, as opposed to making space under
memory pressure.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:11:15 +08:00
|
|
|
kref_init(&ppgtt->ref);
|
2013-07-17 07:50:06 +08:00
|
|
|
drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
|
|
|
|
ppgtt->base.total);
|
2013-12-07 06:11:26 +08:00
|
|
|
i915_init_vm(dev_priv, &ppgtt->base);
|
2013-07-17 07:50:06 +08:00
|
|
|
}
|
2012-02-10 00:15:46 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-08-07 02:19:53 +08:00
|
|
|
int i915_ppgtt_init_hw(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct intel_engine_cs *ring;
|
|
|
|
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
|
|
|
|
int i, ret = 0;
|
|
|
|
|
2014-08-20 23:24:50 +08:00
|
|
|
/* In the case of execlists, PPGTT is enabled by the context descriptor
|
|
|
|
* and the PDPs are contained within the context itself. We don't
|
|
|
|
* need to do anything here. */
|
|
|
|
if (i915.enable_execlists)
|
|
|
|
return 0;
|
|
|
|
|
2014-08-07 02:19:53 +08:00
|
|
|
if (!USES_PPGTT(dev))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (IS_GEN6(dev))
|
|
|
|
gen6_ppgtt_enable(dev);
|
|
|
|
else if (IS_GEN7(dev))
|
|
|
|
gen7_ppgtt_enable(dev);
|
|
|
|
else if (INTEL_INFO(dev)->gen >= 8)
|
|
|
|
gen8_ppgtt_enable(dev);
|
|
|
|
else
|
2014-12-08 23:40:10 +08:00
|
|
|
MISSING_CASE(INTEL_INFO(dev)->gen);
|
2014-08-07 02:19:53 +08:00
|
|
|
|
|
|
|
if (ppgtt) {
|
|
|
|
for_each_ring(ring, dev_priv, i) {
|
2014-08-16 01:51:35 +08:00
|
|
|
ret = ppgtt->switch_mm(ppgtt, ring);
|
2014-08-07 02:19:53 +08:00
|
|
|
if (ret != 0)
|
|
|
|
return ret;
|
2013-12-07 06:11:26 +08:00
|
|
|
}
|
2013-07-17 07:50:06 +08:00
|
|
|
}
|
2012-02-10 00:15:46 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2014-08-06 21:04:47 +08:00
|
|
|
struct i915_hw_ppgtt *
|
|
|
|
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
|
|
|
|
{
|
|
|
|
struct i915_hw_ppgtt *ppgtt;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
|
|
|
|
if (!ppgtt)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
ret = i915_ppgtt_init(dev, ppgtt);
|
|
|
|
if (ret) {
|
|
|
|
kfree(ppgtt);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
ppgtt->file_priv = fpriv;
|
|
|
|
|
2014-11-10 21:44:31 +08:00
|
|
|
trace_i915_ppgtt_create(&ppgtt->base);
|
|
|
|
|
2014-08-06 21:04:47 +08:00
|
|
|
return ppgtt;
|
|
|
|
}
|
|
|
|
|
2014-08-06 21:04:45 +08:00
|
|
|
void i915_ppgtt_release(struct kref *kref)
|
|
|
|
{
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(kref, struct i915_hw_ppgtt, ref);
|
|
|
|
|
2014-11-10 21:44:31 +08:00
|
|
|
trace_i915_ppgtt_release(&ppgtt->base);
|
|
|
|
|
2014-08-06 21:04:45 +08:00
|
|
|
/* vmas should already be unbound */
|
|
|
|
WARN_ON(!list_empty(&ppgtt->base.active_list));
|
|
|
|
WARN_ON(!list_empty(&ppgtt->base.inactive_list));
|
|
|
|
|
2014-08-06 21:04:55 +08:00
|
|
|
list_del(&ppgtt->base.global_link);
|
|
|
|
drm_mm_takedown(&ppgtt->base.mm);
|
|
|
|
|
2014-08-06 21:04:45 +08:00
|
|
|
ppgtt->base.cleanup(&ppgtt->base);
|
|
|
|
kfree(ppgtt);
|
|
|
|
}
|
2012-02-10 00:15:46 +08:00
|
|
|
|
2013-01-19 04:30:31 +08:00
|
|
|
extern int intel_iommu_gfx_mapped;
|
|
|
|
/* Certain Gen5 chipsets require require idling the GPU before
|
|
|
|
* unmapping anything from the GTT when VT-d is enabled.
|
|
|
|
*/
|
2015-04-14 23:35:26 +08:00
|
|
|
static bool needs_idle_maps(struct drm_device *dev)
|
2013-01-19 04:30:31 +08:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_INTEL_IOMMU
|
|
|
|
/* Query intel_iommu to see if we need the workaround. Presumably that
|
|
|
|
* was loaded first.
|
|
|
|
*/
|
|
|
|
if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
|
|
|
|
return true;
|
|
|
|
#endif
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-10-18 06:51:55 +08:00
|
|
|
static bool do_idling(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
bool ret = dev_priv->mm.interruptible;
|
|
|
|
|
2013-01-19 04:30:31 +08:00
|
|
|
if (unlikely(dev_priv->gtt.do_idle_maps)) {
|
2011-10-18 06:51:55 +08:00
|
|
|
dev_priv->mm.interruptible = false;
|
2012-04-27 07:02:58 +08:00
|
|
|
if (i915_gpu_idle(dev_priv->dev)) {
|
2011-10-18 06:51:55 +08:00
|
|
|
DRM_ERROR("Couldn't idle GPU\n");
|
|
|
|
/* Wait a bit, in hopes it avoids the hang */
|
|
|
|
udelay(10);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
|
|
|
|
{
|
2013-01-19 04:30:31 +08:00
|
|
|
if (unlikely(dev_priv->gtt.do_idle_maps))
|
2011-10-18 06:51:55 +08:00
|
|
|
dev_priv->mm.interruptible = interruptible;
|
|
|
|
}
|
|
|
|
|
2013-10-17 00:21:30 +08:00
|
|
|
void i915_check_and_clear_faults(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2014-05-22 21:13:33 +08:00
|
|
|
struct intel_engine_cs *ring;
|
2013-10-17 00:21:30 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (INTEL_INFO(dev)->gen < 6)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for_each_ring(ring, dev_priv, i) {
|
|
|
|
u32 fault_reg;
|
|
|
|
fault_reg = I915_READ(RING_FAULT_REG(ring));
|
|
|
|
if (fault_reg & RING_FAULT_VALID) {
|
|
|
|
DRM_DEBUG_DRIVER("Unexpected fault\n"
|
2014-10-31 01:52:45 +08:00
|
|
|
"\tAddr: 0x%08lx\n"
|
2013-10-17 00:21:30 +08:00
|
|
|
"\tAddress space: %s\n"
|
|
|
|
"\tSource ID: %d\n"
|
|
|
|
"\tType: %d\n",
|
|
|
|
fault_reg & PAGE_MASK,
|
|
|
|
fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
|
|
|
|
RING_FAULT_SRCID(fault_reg),
|
|
|
|
RING_FAULT_FAULT_TYPE(fault_reg));
|
|
|
|
I915_WRITE(RING_FAULT_REG(ring),
|
|
|
|
fault_reg & ~RING_FAULT_VALID);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
|
|
|
|
}
|
|
|
|
|
2014-09-25 17:13:12 +08:00
|
|
|
static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
if (INTEL_INFO(dev_priv->dev)->gen < 6) {
|
|
|
|
intel_gtt_chipset_flush();
|
|
|
|
} else {
|
|
|
|
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
|
|
|
|
POSTING_READ(GFX_FLSH_CNTL_GEN6);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-17 00:21:30 +08:00
|
|
|
void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
|
|
|
|
/* Don't bother messing with faults pre GEN6 as we have little
|
|
|
|
* documentation supporting that it's a good idea.
|
|
|
|
*/
|
|
|
|
if (INTEL_INFO(dev)->gen < 6)
|
|
|
|
return;
|
|
|
|
|
|
|
|
i915_check_and_clear_faults(dev);
|
|
|
|
|
|
|
|
dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
|
2014-02-21 03:50:33 +08:00
|
|
|
dev_priv->gtt.base.start,
|
|
|
|
dev_priv->gtt.base.total,
|
2014-03-27 03:08:20 +08:00
|
|
|
true);
|
2014-09-25 17:13:12 +08:00
|
|
|
|
|
|
|
i915_ggtt_flush(dev_priv);
|
2013-10-17 00:21:30 +08:00
|
|
|
}
|
|
|
|
|
2012-02-16 06:50:21 +08:00
|
|
|
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
|
2010-11-06 17:10:47 +08:00
|
|
|
{
|
2012-06-01 22:20:22 +08:00
|
|
|
if (obj->has_dma_mapping)
|
2012-02-16 06:50:21 +08:00
|
|
|
return 0;
|
2012-06-01 22:20:22 +08:00
|
|
|
|
|
|
|
if (!dma_map_sg(&obj->base.dev->pdev->dev,
|
|
|
|
obj->pages->sgl, obj->pages->nents,
|
|
|
|
PCI_DMA_BIDIRECTIONAL))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
return 0;
|
2010-11-06 17:10:47 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
|
2013-11-03 12:07:18 +08:00
|
|
|
{
|
|
|
|
#ifdef writeq
|
|
|
|
writeq(pte, addr);
|
|
|
|
#else
|
|
|
|
iowrite32((u32)pte, addr);
|
|
|
|
iowrite32(pte >> 32, addr + 4);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
|
|
|
|
struct sg_table *st,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
2014-06-17 13:29:42 +08:00
|
|
|
enum i915_cache_level level, u32 unused)
|
2013-11-03 12:07:18 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = vm->dev->dev_private;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pte_t __iomem *gtt_entries =
|
|
|
|
(gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
|
2013-11-03 12:07:18 +08:00
|
|
|
int i = 0;
|
|
|
|
struct sg_page_iter sg_iter;
|
2014-07-28 19:20:58 +08:00
|
|
|
dma_addr_t addr = 0; /* shut up gcc */
|
2013-11-03 12:07:18 +08:00
|
|
|
|
|
|
|
for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
|
|
|
|
addr = sg_dma_address(sg_iter.sg) +
|
|
|
|
(sg_iter.sg_pgoffset << PAGE_SHIFT);
|
|
|
|
gen8_set_pte(>t_entries[i],
|
|
|
|
gen8_pte_encode(addr, level, true));
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: This serves as a posting read to make sure that the PTE has
|
|
|
|
* actually been updated. There is some concern that even though
|
|
|
|
* registers and PTEs are within the same BAR that they are potentially
|
|
|
|
* of NUMA access patterns. Therefore, even with the way we assume
|
|
|
|
* hardware should work, we must keep this posting read for paranoia.
|
|
|
|
*/
|
|
|
|
if (i != 0)
|
|
|
|
WARN_ON(readq(>t_entries[i-1])
|
|
|
|
!= gen8_pte_encode(addr, level, true));
|
|
|
|
|
|
|
|
/* This next bit makes the above posting read even more important. We
|
|
|
|
* want to flush the TLBs only after we're certain all the PTE updates
|
|
|
|
* have finished.
|
|
|
|
*/
|
|
|
|
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
|
|
|
|
POSTING_READ(GFX_FLSH_CNTL_GEN6);
|
|
|
|
}
|
|
|
|
|
2012-11-05 01:21:27 +08:00
|
|
|
/*
|
|
|
|
* Binds an object into the global gtt with the specified cache level. The object
|
|
|
|
* will be accessible to the GPU via commands whose operands reference offsets
|
|
|
|
* within the global GTT as well as accessible by the GPU through the GMADR
|
|
|
|
* mapped BAR (dev_priv->mm.gtt->gtt).
|
|
|
|
*/
|
2013-07-17 07:50:05 +08:00
|
|
|
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
|
2013-01-25 06:44:55 +08:00
|
|
|
struct sg_table *st,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
2014-06-17 13:29:42 +08:00
|
|
|
enum i915_cache_level level, u32 flags)
|
2012-11-05 01:21:27 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
struct drm_i915_private *dev_priv = vm->dev->dev_private;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t __iomem *gtt_entries =
|
|
|
|
(gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
|
2013-02-19 01:28:04 +08:00
|
|
|
int i = 0;
|
|
|
|
struct sg_page_iter sg_iter;
|
2014-07-28 19:20:58 +08:00
|
|
|
dma_addr_t addr = 0;
|
2012-11-05 01:21:27 +08:00
|
|
|
|
2013-02-19 01:28:04 +08:00
|
|
|
for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
|
2013-03-26 21:14:18 +08:00
|
|
|
addr = sg_page_iter_dma_address(&sg_iter);
|
2014-06-17 13:29:42 +08:00
|
|
|
iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
|
2013-02-19 01:28:04 +08:00
|
|
|
i++;
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* XXX: This serves as a posting read to make sure that the PTE has
|
|
|
|
* actually been updated. There is some concern that even though
|
|
|
|
* registers and PTEs are within the same BAR that they are potentially
|
|
|
|
* of NUMA access patterns. Therefore, even with the way we assume
|
|
|
|
* hardware should work, we must keep this posting read for paranoia.
|
|
|
|
*/
|
2014-07-28 19:20:58 +08:00
|
|
|
if (i != 0) {
|
|
|
|
unsigned long gtt = readl(>t_entries[i-1]);
|
|
|
|
WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
|
|
|
|
}
|
2012-11-05 01:21:30 +08:00
|
|
|
|
|
|
|
/* This next bit makes the above posting read even more important. We
|
|
|
|
* want to flush the TLBs only after we're certain all the PTE updates
|
|
|
|
* have finished.
|
|
|
|
*/
|
|
|
|
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
|
|
|
|
POSTING_READ(GFX_FLSH_CNTL_GEN6);
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
2013-11-03 12:07:18 +08:00
|
|
|
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
|
|
|
uint64_t length,
|
2013-11-03 12:07:18 +08:00
|
|
|
bool use_scratch)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = vm->dev->dev_private;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
|
|
|
unsigned num_entries = length >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen8_pte_t scratch_pte, __iomem *gtt_base =
|
|
|
|
(gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
|
2013-11-03 12:07:18 +08:00
|
|
|
const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (WARN(num_entries > max_entries,
|
|
|
|
"First entry = %d; Num entries = %d (max=%d)\n",
|
|
|
|
first_entry, num_entries, max_entries))
|
|
|
|
num_entries = max_entries;
|
|
|
|
|
|
|
|
scratch_pte = gen8_pte_encode(vm->scratch.addr,
|
|
|
|
I915_CACHE_LLC,
|
|
|
|
use_scratch);
|
|
|
|
for (i = 0; i < num_entries; i++)
|
|
|
|
gen8_set_pte(>t_base[i], scratch_pte);
|
|
|
|
readl(gtt_base);
|
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
|
|
|
uint64_t length,
|
2013-10-17 00:21:30 +08:00
|
|
|
bool use_scratch)
|
2013-01-25 06:44:55 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
struct drm_i915_private *dev_priv = vm->dev->dev_private;
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
|
|
|
unsigned num_entries = length >> PAGE_SHIFT;
|
2015-03-17 00:00:54 +08:00
|
|
|
gen6_pte_t scratch_pte, __iomem *gtt_base =
|
|
|
|
(gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
|
2013-01-25 06:45:00 +08:00
|
|
|
const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
|
2013-01-25 06:44:55 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (WARN(num_entries > max_entries,
|
|
|
|
"First entry = %d; Num entries = %d (max=%d)\n",
|
|
|
|
first_entry, num_entries, max_entries))
|
|
|
|
num_entries = max_entries;
|
|
|
|
|
2014-06-17 13:29:42 +08:00
|
|
|
scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
|
2013-10-17 00:21:30 +08:00
|
|
|
|
2013-01-25 06:44:55 +08:00
|
|
|
for (i = 0; i < num_entries; i++)
|
|
|
|
iowrite32(scratch_pte, >t_base[i]);
|
|
|
|
readl(gtt_base);
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:25 +08:00
|
|
|
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
|
|
|
|
struct sg_table *pages,
|
|
|
|
uint64_t start,
|
|
|
|
enum i915_cache_level cache_level, u32 unused)
|
2013-01-25 06:44:55 +08:00
|
|
|
{
|
|
|
|
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
|
|
|
|
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
|
|
|
|
|
2015-04-14 23:35:25 +08:00
|
|
|
intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
|
2015-04-21 00:04:05 +08:00
|
|
|
|
2013-01-25 06:44:55 +08:00
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
static void i915_ggtt_clear_range(struct i915_address_space *vm,
|
2014-02-21 03:50:33 +08:00
|
|
|
uint64_t start,
|
|
|
|
uint64_t length,
|
2013-10-17 00:21:30 +08:00
|
|
|
bool unused)
|
2013-01-25 06:44:55 +08:00
|
|
|
{
|
2014-02-21 03:50:33 +08:00
|
|
|
unsigned first_entry = start >> PAGE_SHIFT;
|
|
|
|
unsigned num_entries = length >> PAGE_SHIFT;
|
2013-01-25 06:44:55 +08:00
|
|
|
intel_gtt_clear_range(first_entry, num_entries);
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:27 +08:00
|
|
|
static int ggtt_bind_vma(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2011-04-14 13:48:26 +08:00
|
|
|
{
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct drm_device *dev = vma->vm->dev;
|
2013-01-25 06:44:55 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
2015-03-16 20:11:13 +08:00
|
|
|
struct sg_table *pages = obj->pages;
|
2015-04-14 23:35:15 +08:00
|
|
|
u32 pte_flags = 0;
|
2015-04-14 23:35:27 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = i915_get_ggtt_vma_pages(vma);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
pages = vma->ggtt_view.pages;
|
2013-01-25 06:44:55 +08:00
|
|
|
|
2014-06-17 13:29:42 +08:00
|
|
|
/* Currently applicable only to VLV */
|
|
|
|
if (obj->gt_ro)
|
2015-04-14 23:35:15 +08:00
|
|
|
pte_flags |= PTE_READ_ONLY;
|
2014-06-17 13:29:42 +08:00
|
|
|
|
2015-03-16 20:11:13 +08:00
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
|
2015-04-21 00:04:05 +08:00
|
|
|
vma->vm->insert_entries(vma->vm, pages,
|
|
|
|
vma->node.start,
|
|
|
|
cache_level, pte_flags);
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
}
|
2011-04-14 13:48:26 +08:00
|
|
|
|
2015-04-21 00:04:05 +08:00
|
|
|
if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
|
2015-03-16 20:11:13 +08:00
|
|
|
appgtt->base.insert_entries(&appgtt->base, pages,
|
2014-02-21 03:50:33 +08:00
|
|
|
vma->node.start,
|
2015-04-14 23:35:15 +08:00
|
|
|
cache_level, pte_flags);
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
}
|
2015-04-14 23:35:27 +08:00
|
|
|
|
|
|
|
return 0;
|
2011-04-14 13:48:26 +08:00
|
|
|
}
|
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
static void ggtt_unbind_vma(struct i915_vma *vma)
|
2012-02-16 06:50:21 +08:00
|
|
|
{
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct drm_device *dev = vma->vm->dev;
|
2013-01-25 06:44:55 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
2015-04-24 20:09:03 +08:00
|
|
|
const uint64_t size = min_t(uint64_t,
|
|
|
|
obj->base.size,
|
|
|
|
vma->node.size);
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
2014-10-24 19:42:33 +08:00
|
|
|
if (vma->bound & GLOBAL_BIND) {
|
2014-02-21 03:50:33 +08:00
|
|
|
vma->vm->clear_range(vma->vm,
|
|
|
|
vma->node.start,
|
2015-04-24 20:09:03 +08:00
|
|
|
size,
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
true);
|
|
|
|
}
|
2012-02-16 06:50:22 +08:00
|
|
|
|
2015-04-21 00:04:05 +08:00
|
|
|
if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
|
2015-04-24 20:09:03 +08:00
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
appgtt->base.clear_range(&appgtt->base,
|
2014-02-21 03:50:33 +08:00
|
|
|
vma->node.start,
|
2015-04-24 20:09:03 +08:00
|
|
|
size,
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
true);
|
|
|
|
}
|
2012-02-16 06:50:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
|
2010-11-06 17:10:47 +08:00
|
|
|
{
|
2011-10-18 06:51:55 +08:00
|
|
|
struct drm_device *dev = obj->base.dev;
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
bool interruptible;
|
|
|
|
|
|
|
|
interruptible = do_idling(dev_priv);
|
|
|
|
|
2012-06-01 22:20:22 +08:00
|
|
|
if (!obj->has_dma_mapping)
|
|
|
|
dma_unmap_sg(&dev->pdev->dev,
|
|
|
|
obj->pages->sgl, obj->pages->nents,
|
|
|
|
PCI_DMA_BIDIRECTIONAL);
|
2011-10-18 06:51:55 +08:00
|
|
|
|
|
|
|
undo_idling(dev_priv, interruptible);
|
2010-11-06 17:10:47 +08:00
|
|
|
}
|
2012-03-26 15:45:40 +08:00
|
|
|
|
2012-07-26 18:49:32 +08:00
|
|
|
static void i915_gtt_color_adjust(struct drm_mm_node *node,
|
|
|
|
unsigned long color,
|
2015-01-23 16:05:06 +08:00
|
|
|
u64 *start,
|
|
|
|
u64 *end)
|
2012-07-26 18:49:32 +08:00
|
|
|
{
|
|
|
|
if (node->color != color)
|
|
|
|
*start += 4096;
|
|
|
|
|
|
|
|
if (!list_empty(&node->node_list)) {
|
|
|
|
node = list_entry(node->node_list.next,
|
|
|
|
struct drm_mm_node,
|
|
|
|
node_list);
|
|
|
|
if (node->allocated && node->color != color)
|
|
|
|
*end -= 4096;
|
|
|
|
}
|
|
|
|
}
|
2013-11-05 11:56:49 +08:00
|
|
|
|
2014-11-20 04:40:13 +08:00
|
|
|
static int i915_gem_setup_global_gtt(struct drm_device *dev,
|
|
|
|
unsigned long start,
|
|
|
|
unsigned long mappable_end,
|
|
|
|
unsigned long end)
|
2012-03-26 15:45:40 +08:00
|
|
|
{
|
2013-01-26 08:41:04 +08:00
|
|
|
/* Let GEM Manage all of the aperture.
|
|
|
|
*
|
|
|
|
* However, leave one page at the end still bound to the scratch page.
|
|
|
|
* There are a number of places where the hardware apparently prefetches
|
|
|
|
* past the end of the object, and we've seen multiple hangs with the
|
|
|
|
* GPU head pointer stuck in a batchbuffer bound at the last page of the
|
|
|
|
* aperture. One page should be enough to keep any prefetching inside
|
|
|
|
* of the aperture.
|
|
|
|
*/
|
2013-08-01 07:59:59 +08:00
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
|
2012-11-15 19:32:19 +08:00
|
|
|
struct drm_mm_node *entry;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
unsigned long hole_start, hole_end;
|
2014-08-07 02:19:54 +08:00
|
|
|
int ret;
|
2012-03-26 15:45:40 +08:00
|
|
|
|
2013-01-18 04:45:13 +08:00
|
|
|
BUG_ON(mappable_end > end);
|
|
|
|
|
2012-11-15 19:32:19 +08:00
|
|
|
/* Subtract the guard page ... */
|
2013-08-01 07:59:59 +08:00
|
|
|
drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
|
2015-02-10 19:05:48 +08:00
|
|
|
|
|
|
|
dev_priv->gtt.base.start = start;
|
|
|
|
dev_priv->gtt.base.total = end - start;
|
|
|
|
|
|
|
|
if (intel_vgpu_active(dev)) {
|
|
|
|
ret = intel_vgt_balloon(dev);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-07-26 18:49:32 +08:00
|
|
|
if (!HAS_LLC(dev))
|
2013-07-17 07:50:06 +08:00
|
|
|
dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
|
2012-03-26 15:45:40 +08:00
|
|
|
|
2012-11-15 19:32:19 +08:00
|
|
|
/* Mark any preallocated objects as occupied */
|
2013-06-01 02:28:48 +08:00
|
|
|
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
|
2013-08-01 07:59:59 +08:00
|
|
|
struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
|
2014-08-07 02:19:54 +08:00
|
|
|
|
2013-07-06 05:41:05 +08:00
|
|
|
DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
|
2013-07-06 05:41:06 +08:00
|
|
|
i915_gem_obj_ggtt_offset(obj), obj->base.size);
|
|
|
|
|
|
|
|
WARN_ON(i915_gem_obj_ggtt_bound(obj));
|
2013-08-01 07:59:59 +08:00
|
|
|
ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
|
2014-08-06 21:04:50 +08:00
|
|
|
if (ret) {
|
|
|
|
DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
2014-10-24 19:42:33 +08:00
|
|
|
vma->bound |= GLOBAL_BIND;
|
2012-11-15 19:32:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clear any non-preallocated blocks */
|
2013-08-01 07:59:59 +08:00
|
|
|
drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
|
2012-11-15 19:32:19 +08:00
|
|
|
DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
|
|
|
|
hole_start, hole_end);
|
2014-02-21 03:50:33 +08:00
|
|
|
ggtt_vm->clear_range(ggtt_vm, hole_start,
|
|
|
|
hole_end - hole_start, true);
|
2012-11-15 19:32:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* And finally clear the reserved guard page */
|
2014-02-21 03:50:33 +08:00
|
|
|
ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
|
2014-08-06 21:04:50 +08:00
|
|
|
|
2014-08-07 02:19:54 +08:00
|
|
|
if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
|
|
|
|
struct i915_hw_ppgtt *ppgtt;
|
|
|
|
|
|
|
|
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
|
|
|
|
if (!ppgtt)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
ret = __hw_ppgtt_init(dev, ppgtt);
|
|
|
|
if (ret) {
|
|
|
|
ppgtt->base.cleanup(&ppgtt->base);
|
|
|
|
kfree(ppgtt);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ppgtt->base.allocate_va_range)
|
|
|
|
ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
|
|
|
|
ppgtt->base.total);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
if (ret) {
|
2015-04-14 23:35:13 +08:00
|
|
|
ppgtt->base.cleanup(&ppgtt->base);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
kfree(ppgtt);
|
2014-08-07 02:19:54 +08:00
|
|
|
return ret;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 23:46:22 +08:00
|
|
|
}
|
2014-08-07 02:19:54 +08:00
|
|
|
|
2015-04-14 23:35:14 +08:00
|
|
|
ppgtt->base.clear_range(&ppgtt->base,
|
|
|
|
ppgtt->base.start,
|
|
|
|
ppgtt->base.total,
|
|
|
|
true);
|
|
|
|
|
2014-08-07 02:19:54 +08:00
|
|
|
dev_priv->mm.aliasing_ppgtt = ppgtt;
|
|
|
|
}
|
|
|
|
|
2014-08-06 21:04:50 +08:00
|
|
|
return 0;
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
2012-12-19 02:31:25 +08:00
|
|
|
void i915_gem_init_global_gtt(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
unsigned long gtt_size, mappable_size;
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt_size = dev_priv->gtt.base.total;
|
2013-01-18 04:45:17 +08:00
|
|
|
mappable_size = dev_priv->gtt.mappable_end;
|
2012-12-19 02:31:25 +08:00
|
|
|
|
2013-01-26 08:41:04 +08:00
|
|
|
i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
2014-08-06 21:04:56 +08:00
|
|
|
void i915_global_gtt_cleanup(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct i915_address_space *vm = &dev_priv->gtt.base;
|
|
|
|
|
2014-08-06 21:04:57 +08:00
|
|
|
if (dev_priv->mm.aliasing_ppgtt) {
|
|
|
|
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
|
|
|
|
|
|
|
|
ppgtt->base.cleanup(&ppgtt->base);
|
|
|
|
}
|
|
|
|
|
2014-08-06 21:04:56 +08:00
|
|
|
if (drm_mm_initialized(&vm->mm)) {
|
2015-02-10 19:05:48 +08:00
|
|
|
if (intel_vgpu_active(dev))
|
|
|
|
intel_vgt_deballoon();
|
|
|
|
|
2014-08-06 21:04:56 +08:00
|
|
|
drm_mm_takedown(&vm->mm);
|
|
|
|
list_del(&vm->global_link);
|
|
|
|
}
|
|
|
|
|
|
|
|
vm->cleanup(vm);
|
|
|
|
}
|
2014-08-06 21:04:57 +08:00
|
|
|
|
2012-11-05 01:21:27 +08:00
|
|
|
static int setup_scratch_page(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct page *page;
|
|
|
|
dma_addr_t dma_addr;
|
|
|
|
|
|
|
|
page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
|
|
|
|
if (page == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
set_pages_uc(page, 1);
|
|
|
|
|
|
|
|
#ifdef CONFIG_INTEL_IOMMU
|
|
|
|
dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
|
|
|
|
PCI_DMA_BIDIRECTIONAL);
|
|
|
|
if (pci_dma_mapping_error(dev->pdev, dma_addr))
|
|
|
|
return -EINVAL;
|
|
|
|
#else
|
|
|
|
dma_addr = page_to_phys(page);
|
|
|
|
#endif
|
2013-07-17 07:50:05 +08:00
|
|
|
dev_priv->gtt.base.scratch.page = page;
|
|
|
|
dev_priv->gtt.base.scratch.addr = dma_addr;
|
2012-11-05 01:21:27 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void teardown_scratch_page(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2013-07-17 07:50:05 +08:00
|
|
|
struct page *page = dev_priv->gtt.base.scratch.page;
|
|
|
|
|
|
|
|
set_pages_wb(page, 1);
|
|
|
|
pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
|
2012-11-05 01:21:27 +08:00
|
|
|
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
2013-07-17 07:50:05 +08:00
|
|
|
__free_page(page);
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
|
2012-11-05 01:21:27 +08:00
|
|
|
{
|
|
|
|
snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
|
|
|
|
snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
|
|
|
|
return snb_gmch_ctl << 20;
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
|
2013-11-04 08:53:55 +08:00
|
|
|
{
|
|
|
|
bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
|
|
|
|
bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
|
|
|
|
if (bdw_gmch_ctl)
|
|
|
|
bdw_gmch_ctl = 1 << bdw_gmch_ctl;
|
2014-05-28 07:53:08 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
|
|
|
|
if (bdw_gmch_ctl > 4)
|
|
|
|
bdw_gmch_ctl = 4;
|
|
|
|
#endif
|
|
|
|
|
2013-11-04 08:53:55 +08:00
|
|
|
return bdw_gmch_ctl << 20;
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
|
2014-05-09 03:19:40 +08:00
|
|
|
{
|
|
|
|
gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
|
|
|
|
gmch_ctrl &= SNB_GMCH_GGMS_MASK;
|
|
|
|
|
|
|
|
if (gmch_ctrl)
|
|
|
|
return 1 << (20 + gmch_ctrl);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
|
2012-11-05 01:21:27 +08:00
|
|
|
{
|
|
|
|
snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
|
|
|
|
snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
|
|
|
|
return snb_gmch_ctl << 25; /* 32 MB units */
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:26 +08:00
|
|
|
static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
|
2013-11-04 08:53:55 +08:00
|
|
|
{
|
|
|
|
bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
|
|
|
|
bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
|
|
|
|
return bdw_gmch_ctl << 25; /* 32 MB units */
|
|
|
|
}
|
|
|
|
|
2014-05-09 03:19:40 +08:00
|
|
|
static size_t chv_get_stolen_size(u16 gmch_ctrl)
|
|
|
|
{
|
|
|
|
gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
|
|
|
|
gmch_ctrl &= SNB_GMCH_GMS_MASK;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 0x0 to 0x10: 32MB increments starting at 0MB
|
|
|
|
* 0x11 to 0x16: 4MB increments starting at 8MB
|
|
|
|
* 0x17 to 0x1d: 4MB increments start at 36MB
|
|
|
|
*/
|
|
|
|
if (gmch_ctrl < 0x11)
|
|
|
|
return gmch_ctrl << 25;
|
|
|
|
else if (gmch_ctrl < 0x17)
|
|
|
|
return (gmch_ctrl - 0x11 + 2) << 22;
|
|
|
|
else
|
|
|
|
return (gmch_ctrl - 0x17 + 9) << 22;
|
|
|
|
}
|
|
|
|
|
2014-01-10 02:02:46 +08:00
|
|
|
static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
|
|
|
|
{
|
|
|
|
gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
|
|
|
|
gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
|
|
|
|
|
|
|
|
if (gen9_gmch_ctl < 0xf0)
|
|
|
|
return gen9_gmch_ctl << 25; /* 32 MB units */
|
|
|
|
else
|
|
|
|
/* 4MB increments starting at 0xf0 for 4MB */
|
|
|
|
return (gen9_gmch_ctl - 0xf0 + 1) << 22;
|
|
|
|
}
|
|
|
|
|
2013-11-05 11:32:22 +08:00
|
|
|
static int ggtt_probe_common(struct drm_device *dev,
|
|
|
|
size_t gtt_size)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2013-12-22 01:52:52 +08:00
|
|
|
phys_addr_t gtt_phys_addr;
|
2013-11-05 11:32:22 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* For Modern GENs the PTEs and register space are split in the BAR */
|
2013-12-22 01:52:52 +08:00
|
|
|
gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
|
2013-11-05 11:32:22 +08:00
|
|
|
(pci_resource_len(dev->pdev, 0) / 2);
|
|
|
|
|
2015-03-27 19:07:33 +08:00
|
|
|
/*
|
|
|
|
* On BXT writes larger than 64 bit to the GTT pagetable range will be
|
|
|
|
* dropped. For WC mappings in general we have 64 byte burst writes
|
|
|
|
* when the WC buffer is flushed, so we can't use it, but have to
|
|
|
|
* resort to an uncached mapping. The WC issue is easily caught by the
|
|
|
|
* readback check when writing GTT PTE entries.
|
|
|
|
*/
|
|
|
|
if (IS_BROXTON(dev))
|
|
|
|
dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
|
|
|
|
else
|
|
|
|
dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
|
2013-11-05 11:32:22 +08:00
|
|
|
if (!dev_priv->gtt.gsm) {
|
|
|
|
DRM_ERROR("Failed to map the gtt page table\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = setup_scratch_page(dev);
|
|
|
|
if (ret) {
|
|
|
|
DRM_ERROR("Scratch setup failed\n");
|
|
|
|
/* iounmap will also get called at remove, but meh */
|
|
|
|
iounmap(dev_priv->gtt.gsm);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-11-05 11:56:49 +08:00
|
|
|
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
|
|
|
|
* bits. When using advanced contexts each context stores its own PAT, but
|
|
|
|
* writing this data shouldn't be harmful even in those cases. */
|
2014-04-09 18:28:01 +08:00
|
|
|
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
|
2013-11-05 11:56:49 +08:00
|
|
|
{
|
|
|
|
uint64_t pat;
|
|
|
|
|
|
|
|
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
|
|
|
|
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
|
|
|
|
GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
|
|
|
|
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
|
|
|
|
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
|
|
|
|
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
|
|
|
|
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
|
|
|
|
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
|
|
|
|
|
2014-11-06 08:56:36 +08:00
|
|
|
if (!USES_PPGTT(dev_priv->dev))
|
|
|
|
/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
|
|
|
|
* so RTL will always use the value corresponding to
|
|
|
|
* pat_sel = 000".
|
|
|
|
* So let's disable cache for GGTT to avoid screen corruptions.
|
|
|
|
* MOCS still can be used though.
|
|
|
|
* - System agent ggtt writes (i.e. cpu gtt mmaps) already work
|
|
|
|
* before this patch, i.e. the same uncached + snooping access
|
|
|
|
* like on gen6/7 seems to be in effect.
|
|
|
|
* - So this just fixes blitter/render access. Again it looks
|
|
|
|
* like it's not just uncached access, but uncached + snooping.
|
|
|
|
* So we can still hold onto all our assumptions wrt cpu
|
|
|
|
* clflushing on LLC machines.
|
|
|
|
*/
|
|
|
|
pat = GEN8_PPAT(0, GEN8_PPAT_UC);
|
|
|
|
|
2013-11-05 11:56:49 +08:00
|
|
|
/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
|
|
|
|
* write would work. */
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT, pat);
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
|
|
|
|
}
|
|
|
|
|
2014-04-09 18:28:01 +08:00
|
|
|
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
uint64_t pat;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Map WB on BDW to snooped on CHV.
|
|
|
|
*
|
|
|
|
* Only the snoop bit has meaning for CHV, the rest is
|
|
|
|
* ignored.
|
|
|
|
*
|
2014-11-15 03:02:44 +08:00
|
|
|
* The hardware will never snoop for certain types of accesses:
|
|
|
|
* - CPU GTT (GMADR->GGTT->no snoop->memory)
|
|
|
|
* - PPGTT page tables
|
|
|
|
* - some other special cycles
|
|
|
|
*
|
|
|
|
* As with BDW, we also need to consider the following for GT accesses:
|
|
|
|
* "For GGTT, there is NO pat_sel[2:0] from the entry,
|
|
|
|
* so RTL will always use the value corresponding to
|
|
|
|
* pat_sel = 000".
|
|
|
|
* Which means we must set the snoop bit in PAT entry 0
|
|
|
|
* in order to keep the global status page working.
|
2014-04-09 18:28:01 +08:00
|
|
|
*/
|
|
|
|
pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(1, 0) |
|
|
|
|
GEN8_PPAT(2, 0) |
|
|
|
|
GEN8_PPAT(3, 0) |
|
|
|
|
GEN8_PPAT(4, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(5, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(6, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(7, CHV_PPAT_SNOOP);
|
|
|
|
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT, pat);
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
|
|
|
|
}
|
|
|
|
|
2013-11-05 11:32:22 +08:00
|
|
|
static int gen8_gmch_probe(struct drm_device *dev,
|
|
|
|
size_t *gtt_total,
|
|
|
|
size_t *stolen,
|
|
|
|
phys_addr_t *mappable_base,
|
|
|
|
unsigned long *mappable_end)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
unsigned int gtt_size;
|
|
|
|
u16 snb_gmch_ctl;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* TODO: We're not aware of mappable constraints on gen8 yet */
|
|
|
|
*mappable_base = pci_resource_start(dev->pdev, 2);
|
|
|
|
*mappable_end = pci_resource_len(dev->pdev, 2);
|
|
|
|
|
|
|
|
if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
|
|
|
|
pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
|
|
|
|
|
|
|
|
pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
|
|
|
|
|
2014-01-10 02:02:46 +08:00
|
|
|
if (INTEL_INFO(dev)->gen >= 9) {
|
|
|
|
*stolen = gen9_get_stolen_size(snb_gmch_ctl);
|
|
|
|
gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
|
|
|
} else if (IS_CHERRYVIEW(dev)) {
|
2014-05-09 03:19:40 +08:00
|
|
|
*stolen = chv_get_stolen_size(snb_gmch_ctl);
|
|
|
|
gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
|
|
|
|
} else {
|
|
|
|
*stolen = gen8_get_stolen_size(snb_gmch_ctl);
|
|
|
|
gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
|
|
|
}
|
2013-11-05 11:32:22 +08:00
|
|
|
|
2015-03-17 00:00:54 +08:00
|
|
|
*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
|
2013-11-05 11:32:22 +08:00
|
|
|
|
2015-03-17 17:39:31 +08:00
|
|
|
if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
|
2014-04-09 18:28:01 +08:00
|
|
|
chv_setup_private_ppat(dev_priv);
|
|
|
|
else
|
|
|
|
bdw_setup_private_ppat(dev_priv);
|
2013-11-05 11:56:49 +08:00
|
|
|
|
2013-11-05 11:32:22 +08:00
|
|
|
ret = ggtt_probe_common(dev, gtt_size);
|
|
|
|
|
2013-11-03 12:07:18 +08:00
|
|
|
dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
|
|
|
|
dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
|
2015-04-14 23:35:12 +08:00
|
|
|
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
|
|
|
|
dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
|
2013-11-05 11:32:22 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-01-25 05:49:57 +08:00
|
|
|
static int gen6_gmch_probe(struct drm_device *dev,
|
|
|
|
size_t *gtt_total,
|
2013-02-09 03:32:47 +08:00
|
|
|
size_t *stolen,
|
|
|
|
phys_addr_t *mappable_base,
|
|
|
|
unsigned long *mappable_end)
|
2012-11-05 01:21:27 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
2013-01-25 05:49:57 +08:00
|
|
|
unsigned int gtt_size;
|
2012-11-05 01:21:27 +08:00
|
|
|
u16 snb_gmch_ctl;
|
|
|
|
int ret;
|
|
|
|
|
2013-02-09 03:32:47 +08:00
|
|
|
*mappable_base = pci_resource_start(dev->pdev, 2);
|
|
|
|
*mappable_end = pci_resource_len(dev->pdev, 2);
|
|
|
|
|
2013-01-25 05:49:57 +08:00
|
|
|
/* 64/512MB is the current min/max we actually know of, but this is just
|
|
|
|
* a coarse sanity check.
|
2012-11-05 01:21:27 +08:00
|
|
|
*/
|
2013-02-09 03:32:47 +08:00
|
|
|
if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
|
2013-01-25 05:49:57 +08:00
|
|
|
DRM_ERROR("Unknown GMADR size (%lx)\n",
|
|
|
|
dev_priv->gtt.mappable_end);
|
|
|
|
return -ENXIO;
|
2012-11-05 01:21:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
|
|
|
|
pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
|
|
|
|
pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
|
|
|
|
|
Revert "drm/i915: Calculate correct stolen size for GEN7+"
This reverts commit 03752f5b7b77b95d83479885040950fba1250850.
This revert requires a bit of explanation on how I understand things
work. Internally the architects/designers decide how the stolen encoding
works. We put it in a doc. BIOS writers take these docs and implement
it. Driver writers read the doc too, and read the value left by the BIOS
writers, and then we make magic.
The failing here is that in the docs we had[1] contained two different
definitions for this register for Gen7. (We have both a PCI register,
and an MMIO, and each of these were different). At the time [2] of
03752f5, we asked the architects what the correct value should be; but
that doesn't match the reality (BIOS) unfortunately.
So on all machines I can get my hands on, this revert is the right thing
to do. I've also worked with the product group to confirm that they
agree this revert is what we should do. People using HW made my "people"
who both write their own BIOS, and have access to our docs (Apple?).
Investigations are still ongoing about whether we need to add a list
of machines needing special handling, but this patch should be the
right thing for pretty much everyone.
[1] The docs are still wrong on this one. Now instead of two registers with
two definitions, we have one register with BOTH definitions, progress?
[2] The open source PRMs have the "wrong" definitions in chapter Volume
1 part6, section 1.1.12.
This digging was inspired by Paulo.
Cc: Paulo Zanoni <przanoni@gmail.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: Augment the patch saying that it's still a bit unclear
whether there are any machines out there with "wrong" firmware and
whether we need to add a list to handle them specially.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-05-02 02:00:34 +08:00
|
|
|
*stolen = gen6_get_stolen_size(snb_gmch_ctl);
|
2013-04-09 09:43:47 +08:00
|
|
|
|
2013-11-05 11:32:22 +08:00
|
|
|
gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
|
2015-03-17 00:00:54 +08:00
|
|
|
*gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
|
2012-11-05 01:21:27 +08:00
|
|
|
|
2013-11-05 11:32:22 +08:00
|
|
|
ret = ggtt_probe_common(dev, gtt_size);
|
2012-11-05 01:21:27 +08:00
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
|
|
|
|
dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
|
2015-04-14 23:35:12 +08:00
|
|
|
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
|
|
|
|
dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
|
2013-01-25 06:44:55 +08:00
|
|
|
|
2012-11-05 01:21:27 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
static void gen6_gmch_remove(struct i915_address_space *vm)
|
2012-11-05 01:21:27 +08:00
|
|
|
{
|
2013-07-17 07:50:05 +08:00
|
|
|
|
|
|
|
struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
|
2013-11-26 01:54:43 +08:00
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
iounmap(gtt->gsm);
|
|
|
|
teardown_scratch_page(vm->dev);
|
2012-03-26 15:45:40 +08:00
|
|
|
}
|
2013-01-25 05:49:57 +08:00
|
|
|
|
|
|
|
static int i915_gmch_probe(struct drm_device *dev,
|
|
|
|
size_t *gtt_total,
|
2013-02-09 03:32:47 +08:00
|
|
|
size_t *stolen,
|
|
|
|
phys_addr_t *mappable_base,
|
|
|
|
unsigned long *mappable_end)
|
2013-01-25 05:49:57 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
|
|
|
|
if (!ret) {
|
|
|
|
DRM_ERROR("failed to set up gmch\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2013-02-09 03:32:47 +08:00
|
|
|
intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
|
2013-01-25 05:49:57 +08:00
|
|
|
|
|
|
|
dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
|
2015-04-14 23:35:25 +08:00
|
|
|
dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
|
2013-07-17 07:50:05 +08:00
|
|
|
dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
|
2015-04-14 23:35:25 +08:00
|
|
|
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
|
|
|
|
dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
|
2013-01-25 05:49:57 +08:00
|
|
|
|
2013-12-30 20:16:15 +08:00
|
|
|
if (unlikely(dev_priv->gtt.do_idle_maps))
|
|
|
|
DRM_INFO("applying Ironlake quirks for intel_iommu\n");
|
|
|
|
|
2013-01-25 05:49:57 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
static void i915_gmch_remove(struct i915_address_space *vm)
|
2013-01-25 05:49:57 +08:00
|
|
|
{
|
|
|
|
intel_gmch_remove();
|
|
|
|
}
|
|
|
|
|
|
|
|
int i915_gem_gtt_init(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct i915_gtt *gtt = &dev_priv->gtt;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (INTEL_INFO(dev)->gen <= 5) {
|
2013-06-28 07:30:20 +08:00
|
|
|
gtt->gtt_probe = i915_gmch_probe;
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.cleanup = i915_gmch_remove;
|
2013-11-05 11:32:22 +08:00
|
|
|
} else if (INTEL_INFO(dev)->gen < 8) {
|
2013-06-28 07:30:20 +08:00
|
|
|
gtt->gtt_probe = gen6_gmch_probe;
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.cleanup = gen6_gmch_remove;
|
2013-07-05 02:02:06 +08:00
|
|
|
if (IS_HASWELL(dev) && dev_priv->ellc_size)
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.pte_encode = iris_pte_encode;
|
2013-07-05 02:02:06 +08:00
|
|
|
else if (IS_HASWELL(dev))
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.pte_encode = hsw_pte_encode;
|
2013-06-28 07:30:20 +08:00
|
|
|
else if (IS_VALLEYVIEW(dev))
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.pte_encode = byt_pte_encode;
|
2013-08-06 20:17:02 +08:00
|
|
|
else if (INTEL_INFO(dev)->gen >= 7)
|
|
|
|
gtt->base.pte_encode = ivb_pte_encode;
|
2013-06-28 07:30:20 +08:00
|
|
|
else
|
2013-08-06 20:17:02 +08:00
|
|
|
gtt->base.pte_encode = snb_pte_encode;
|
2013-11-05 11:32:22 +08:00
|
|
|
} else {
|
|
|
|
dev_priv->gtt.gtt_probe = gen8_gmch_probe;
|
|
|
|
dev_priv->gtt.base.cleanup = gen6_gmch_remove;
|
2013-01-25 05:49:57 +08:00
|
|
|
}
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
|
2013-06-28 07:30:20 +08:00
|
|
|
>t->mappable_base, >t->mappable_end);
|
2013-01-25 06:45:00 +08:00
|
|
|
if (ret)
|
2013-01-25 05:49:57 +08:00
|
|
|
return ret;
|
|
|
|
|
2013-07-17 07:50:05 +08:00
|
|
|
gtt->base.dev = dev;
|
|
|
|
|
2013-01-25 05:49:57 +08:00
|
|
|
/* GMADR is the PCI mmio aperture into the global GTT. */
|
2013-07-17 07:50:05 +08:00
|
|
|
DRM_INFO("Memory usable by graphics device = %zdM\n",
|
|
|
|
gtt->base.total >> 20);
|
2013-06-28 07:30:20 +08:00
|
|
|
DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
|
|
|
|
DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
|
2014-03-31 22:23:04 +08:00
|
|
|
#ifdef CONFIG_INTEL_IOMMU
|
|
|
|
if (intel_iommu_gfx_mapped)
|
|
|
|
DRM_INFO("VT-d active for gfx access\n");
|
|
|
|
#endif
|
2014-04-29 17:53:58 +08:00
|
|
|
/*
|
|
|
|
* i915.enable_ppgtt is read-only, so do an early pass to validate the
|
|
|
|
* user's requested state against the hardware/driver capabilities. We
|
|
|
|
* do this now so that we can print out any log messages once rather
|
|
|
|
* than every time we check intel_enable_ppgtt().
|
|
|
|
*/
|
|
|
|
i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
|
|
|
|
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
|
2013-01-25 05:49:57 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
2015-04-14 23:35:23 +08:00
|
|
|
void i915_gem_restore_gtt_mappings(struct drm_device *dev)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
|
|
|
|
i915_check_and_clear_faults(dev);
|
|
|
|
|
|
|
|
/* First fill our portion of the GTT with scratch pages */
|
|
|
|
dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
|
|
|
|
dev_priv->gtt.base.start,
|
|
|
|
dev_priv->gtt.base.total,
|
|
|
|
true);
|
|
|
|
|
|
|
|
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
|
|
|
|
struct i915_vma *vma = i915_gem_obj_to_vma(obj,
|
|
|
|
&dev_priv->gtt.base);
|
|
|
|
if (!vma)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
i915_gem_clflush_object(obj, obj->pin_display);
|
|
|
|
WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (INTEL_INFO(dev)->gen >= 8) {
|
|
|
|
if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
|
|
|
|
chv_setup_private_ppat(dev_priv);
|
|
|
|
else
|
|
|
|
bdw_setup_private_ppat(dev_priv);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (USES_PPGTT(dev)) {
|
|
|
|
list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
|
|
|
|
/* TODO: Perhaps it shouldn't be gen6 specific */
|
|
|
|
|
|
|
|
struct i915_hw_ppgtt *ppgtt =
|
|
|
|
container_of(vm, struct i915_hw_ppgtt,
|
|
|
|
base);
|
|
|
|
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
ppgtt = dev_priv->mm.aliasing_ppgtt;
|
|
|
|
|
|
|
|
gen6_write_page_range(dev_priv, &ppgtt->pd,
|
|
|
|
0, ppgtt->base.total);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_ggtt_flush(dev_priv);
|
|
|
|
}
|
|
|
|
|
2015-03-16 20:11:13 +08:00
|
|
|
static struct i915_vma *
|
|
|
|
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
|
|
|
|
struct i915_address_space *vm,
|
|
|
|
const struct i915_ggtt_view *ggtt_view)
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
{
|
2015-03-18 16:21:58 +08:00
|
|
|
struct i915_vma *vma;
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
2015-03-16 20:11:13 +08:00
|
|
|
if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
|
|
|
|
return ERR_PTR(-EINVAL);
|
2015-04-07 23:20:58 +08:00
|
|
|
|
|
|
|
vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
|
2015-03-18 16:21:58 +08:00
|
|
|
if (vma == NULL)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
2015-03-16 20:11:13 +08:00
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
INIT_LIST_HEAD(&vma->vma_link);
|
|
|
|
INIT_LIST_HEAD(&vma->mm_list);
|
|
|
|
INIT_LIST_HEAD(&vma->exec_list);
|
|
|
|
vma->vm = vm;
|
|
|
|
vma->obj = obj;
|
|
|
|
|
2015-04-14 23:35:12 +08:00
|
|
|
if (i915_is_ggtt(vm))
|
2015-03-16 20:11:13 +08:00
|
|
|
vma->ggtt_view = *ggtt_view;
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
2014-12-03 22:59:24 +08:00
|
|
|
list_add_tail(&vma->vma_link, &obj->vma_list);
|
|
|
|
if (!i915_is_ggtt(vm))
|
2014-08-19 22:49:41 +08:00
|
|
|
i915_ppgtt_get(i915_vm_to_ppgtt(vm));
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
|
|
|
return vma;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct i915_vma *
|
2015-03-16 20:11:13 +08:00
|
|
|
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
|
|
|
|
struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
vma = i915_gem_obj_to_vma(obj, vm);
|
|
|
|
if (!vma)
|
|
|
|
vma = __i915_gem_vma_create(obj, vm,
|
|
|
|
i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
|
|
|
|
|
|
|
|
return vma;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct i915_vma *
|
|
|
|
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
|
2014-12-11 01:27:58 +08:00
|
|
|
const struct i915_ggtt_view *view)
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
{
|
2015-03-16 20:11:13 +08:00
|
|
|
struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2015-03-16 20:11:13 +08:00
|
|
|
if (WARN_ON(!view))
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
vma = i915_gem_obj_to_ggtt_view(obj, view);
|
|
|
|
|
|
|
|
if (IS_ERR(vma))
|
|
|
|
return vma;
|
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
if (!vma)
|
2015-03-16 20:11:13 +08:00
|
|
|
vma = __i915_gem_vma_create(obj, ggtt, view);
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
|
|
|
|
return vma;
|
2015-03-16 20:11:13 +08:00
|
|
|
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 06:10:56 +08:00
|
|
|
}
|
2014-12-11 01:27:58 +08:00
|
|
|
|
2015-03-23 19:10:36 +08:00
|
|
|
static void
|
|
|
|
rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
|
|
|
|
struct sg_table *st)
|
|
|
|
{
|
|
|
|
unsigned int column, row;
|
|
|
|
unsigned int src_idx;
|
|
|
|
struct scatterlist *sg = st->sgl;
|
|
|
|
|
|
|
|
st->nents = 0;
|
|
|
|
|
|
|
|
for (column = 0; column < width; column++) {
|
|
|
|
src_idx = width * (height - 1) + column;
|
|
|
|
for (row = 0; row < height; row++) {
|
|
|
|
st->nents++;
|
|
|
|
/* We don't need the pages, but need to initialize
|
|
|
|
* the entries so the sg list can be happily traversed.
|
|
|
|
* The only thing we need are DMA addresses.
|
|
|
|
*/
|
|
|
|
sg_set_page(sg, NULL, PAGE_SIZE, 0);
|
|
|
|
sg_dma_address(sg) = in[src_idx];
|
|
|
|
sg_dma_len(sg) = PAGE_SIZE;
|
|
|
|
sg = sg_next(sg);
|
|
|
|
src_idx -= width;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sg_table *
|
|
|
|
intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
|
|
|
|
struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
struct drm_device *dev = obj->base.dev;
|
|
|
|
struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
|
|
|
|
unsigned long size, pages, rot_pages;
|
|
|
|
struct sg_page_iter sg_iter;
|
|
|
|
unsigned long i;
|
|
|
|
dma_addr_t *page_addr_list;
|
|
|
|
struct sg_table *st;
|
|
|
|
unsigned int tile_pitch, tile_height;
|
|
|
|
unsigned int width_pages, height_pages;
|
2015-03-25 18:15:26 +08:00
|
|
|
int ret = -ENOMEM;
|
2015-03-23 19:10:36 +08:00
|
|
|
|
|
|
|
pages = obj->base.size / PAGE_SIZE;
|
|
|
|
|
|
|
|
/* Calculate tiling geometry. */
|
|
|
|
tile_height = intel_tile_height(dev, rot_info->pixel_format,
|
|
|
|
rot_info->fb_modifier);
|
|
|
|
tile_pitch = PAGE_SIZE / tile_height;
|
|
|
|
width_pages = DIV_ROUND_UP(rot_info->pitch, tile_pitch);
|
|
|
|
height_pages = DIV_ROUND_UP(rot_info->height, tile_height);
|
|
|
|
rot_pages = width_pages * height_pages;
|
|
|
|
size = rot_pages * PAGE_SIZE;
|
|
|
|
|
|
|
|
/* Allocate a temporary list of source pages for random access. */
|
|
|
|
page_addr_list = drm_malloc_ab(pages, sizeof(dma_addr_t));
|
|
|
|
if (!page_addr_list)
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
|
|
|
|
/* Allocate target SG list. */
|
|
|
|
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto err_st_alloc;
|
|
|
|
|
|
|
|
ret = sg_alloc_table(st, rot_pages, GFP_KERNEL);
|
|
|
|
if (ret)
|
|
|
|
goto err_sg_alloc;
|
|
|
|
|
|
|
|
/* Populate source page list from the object. */
|
|
|
|
i = 0;
|
|
|
|
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
|
|
|
|
page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Rotate the pages. */
|
|
|
|
rotate_pages(page_addr_list, width_pages, height_pages, st);
|
|
|
|
|
|
|
|
DRM_DEBUG_KMS(
|
|
|
|
"Created rotated page mapping for object size %lu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages).\n",
|
|
|
|
size, rot_info->pitch, rot_info->height,
|
|
|
|
rot_info->pixel_format, width_pages, height_pages,
|
|
|
|
rot_pages);
|
|
|
|
|
|
|
|
drm_free_large(page_addr_list);
|
|
|
|
|
|
|
|
return st;
|
|
|
|
|
|
|
|
err_sg_alloc:
|
|
|
|
kfree(st);
|
|
|
|
err_st_alloc:
|
|
|
|
drm_free_large(page_addr_list);
|
|
|
|
|
|
|
|
DRM_DEBUG_KMS(
|
|
|
|
"Failed to create rotated mapping for object size %lu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages)\n",
|
|
|
|
size, ret, rot_info->pitch, rot_info->height,
|
|
|
|
rot_info->pixel_format, width_pages, height_pages,
|
|
|
|
rot_pages);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
2015-03-16 20:11:13 +08:00
|
|
|
|
2015-05-06 19:35:38 +08:00
|
|
|
static struct sg_table *
|
|
|
|
intel_partial_pages(const struct i915_ggtt_view *view,
|
|
|
|
struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
struct sg_table *st;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
struct sg_page_iter obj_sg_iter;
|
|
|
|
int ret = -ENOMEM;
|
|
|
|
|
|
|
|
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto err_st_alloc;
|
|
|
|
|
|
|
|
ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
|
|
|
|
if (ret)
|
|
|
|
goto err_sg_alloc;
|
|
|
|
|
|
|
|
sg = st->sgl;
|
|
|
|
st->nents = 0;
|
|
|
|
for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
|
|
|
|
view->params.partial.offset)
|
|
|
|
{
|
|
|
|
if (st->nents >= view->params.partial.size)
|
|
|
|
break;
|
|
|
|
|
|
|
|
sg_set_page(sg, NULL, PAGE_SIZE, 0);
|
|
|
|
sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
|
|
|
|
sg_dma_len(sg) = PAGE_SIZE;
|
|
|
|
|
|
|
|
sg = sg_next(sg);
|
|
|
|
st->nents++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return st;
|
|
|
|
|
|
|
|
err_sg_alloc:
|
|
|
|
kfree(st);
|
|
|
|
err_st_alloc:
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
2015-04-14 23:35:27 +08:00
|
|
|
static int
|
2015-03-23 19:10:36 +08:00
|
|
|
i915_get_ggtt_vma_pages(struct i915_vma *vma)
|
2014-12-11 01:27:58 +08:00
|
|
|
{
|
2015-03-23 19:10:36 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2014-12-11 01:27:58 +08:00
|
|
|
if (vma->ggtt_view.pages)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
|
|
|
|
vma->ggtt_view.pages = vma->obj->pages;
|
2015-03-23 19:10:36 +08:00
|
|
|
else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
|
|
|
|
vma->ggtt_view.pages =
|
|
|
|
intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
|
2015-05-06 19:35:38 +08:00
|
|
|
else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
|
|
|
|
vma->ggtt_view.pages =
|
|
|
|
intel_partial_pages(&vma->ggtt_view, vma->obj);
|
2014-12-11 01:27:58 +08:00
|
|
|
else
|
|
|
|
WARN_ONCE(1, "GGTT view %u not implemented!\n",
|
|
|
|
vma->ggtt_view.type);
|
|
|
|
|
|
|
|
if (!vma->ggtt_view.pages) {
|
2015-03-16 20:11:13 +08:00
|
|
|
DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
|
2014-12-11 01:27:58 +08:00
|
|
|
vma->ggtt_view.type);
|
2015-03-23 19:10:36 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
} else if (IS_ERR(vma->ggtt_view.pages)) {
|
|
|
|
ret = PTR_ERR(vma->ggtt_view.pages);
|
|
|
|
vma->ggtt_view.pages = NULL;
|
|
|
|
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
|
|
|
|
vma->ggtt_view.type, ret);
|
2014-12-11 01:27:58 +08:00
|
|
|
}
|
|
|
|
|
2015-03-23 19:10:36 +08:00
|
|
|
return ret;
|
2014-12-11 01:27:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
|
|
|
|
* @vma: VMA to map
|
|
|
|
* @cache_level: mapping cache level
|
|
|
|
* @flags: flags like global or local mapping
|
|
|
|
*
|
|
|
|
* DMA addresses are taken from the scatter-gather table of this object (or of
|
|
|
|
* this VMA in case of non-default GGTT views) and PTE entries set up.
|
|
|
|
* Note that DMA addresses are also the only part of the SG table we care about.
|
|
|
|
*/
|
|
|
|
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
|
|
|
{
|
2015-04-28 22:56:17 +08:00
|
|
|
int ret;
|
|
|
|
u32 bind_flags;
|
2015-04-10 20:54:58 +08:00
|
|
|
|
2015-04-28 22:56:17 +08:00
|
|
|
if (WARN_ON(flags == 0))
|
|
|
|
return -EINVAL;
|
2015-04-10 20:54:58 +08:00
|
|
|
|
2015-04-28 22:56:17 +08:00
|
|
|
bind_flags = 0;
|
2015-04-21 00:04:05 +08:00
|
|
|
if (flags & PIN_GLOBAL)
|
|
|
|
bind_flags |= GLOBAL_BIND;
|
|
|
|
if (flags & PIN_USER)
|
|
|
|
bind_flags |= LOCAL_BIND;
|
|
|
|
|
|
|
|
if (flags & PIN_UPDATE)
|
|
|
|
bind_flags |= vma->bound;
|
|
|
|
else
|
|
|
|
bind_flags &= ~vma->bound;
|
|
|
|
|
2015-04-28 22:56:17 +08:00
|
|
|
if (bind_flags == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (vma->bound == 0 && vma->vm->allocate_va_range) {
|
|
|
|
trace_i915_va_alloc(vma->vm,
|
|
|
|
vma->node.start,
|
|
|
|
vma->node.size,
|
|
|
|
VM_TO_TRACE_NAME(vma->vm));
|
|
|
|
|
|
|
|
ret = vma->vm->allocate_va_range(vma->vm,
|
|
|
|
vma->node.start,
|
|
|
|
vma->node.size);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
|
2015-04-14 23:35:27 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-04-21 00:04:05 +08:00
|
|
|
|
|
|
|
vma->bound |= bind_flags;
|
2014-12-11 01:27:58 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-05-06 19:33:58 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_ggtt_view_size - Get the size of a GGTT view.
|
|
|
|
* @obj: Object the view is of.
|
|
|
|
* @view: The view in question.
|
|
|
|
*
|
|
|
|
* @return The size of the GGTT view in bytes.
|
|
|
|
*/
|
|
|
|
size_t
|
|
|
|
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
|
|
|
|
const struct i915_ggtt_view *view)
|
|
|
|
{
|
|
|
|
if (view->type == I915_GGTT_VIEW_NORMAL ||
|
|
|
|
view->type == I915_GGTT_VIEW_ROTATED) {
|
|
|
|
return obj->base.size;
|
2015-05-06 19:35:38 +08:00
|
|
|
} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
|
|
|
|
return view->params.partial.size << PAGE_SHIFT;
|
2015-05-06 19:33:58 +08:00
|
|
|
} else {
|
|
|
|
WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
|
|
|
|
return obj->base.size;
|
|
|
|
}
|
|
|
|
}
|