linux/drivers/gpu/drm/i915/i915_utils.h

193 lines
5.6 KiB
C
Raw Normal View History

/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
#if 0
#define WARN_ON(x) ({ \
bool __i915_warn_cond = (x); \
if (__builtin_constant_p(__i915_warn_cond)) \
BUILD_BUG_ON(__i915_warn_cond); \
WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
#else
#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
#endif
#undef WARN_ON_ONCE
#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
__stringify(x), (long)(x))
#if defined(GCC_VERSION) && GCC_VERSION >= 70000
#define add_overflows_t(T, A, B) \
__builtin_add_overflow_p((A), (B), (T)0)
#else
#define add_overflows_t(T, A, B) ({ \
typeof(A) a = (A); \
typeof(B) b = (B); \
(T)(a + b) < a; \
})
#endif
#define add_overflows(A, B) \
add_overflows_t(typeof((A) + (B)), (A), (B))
#define range_overflows(start, size, max) ({ \
typeof(start) start__ = (start); \
typeof(size) size__ = (size); \
typeof(max) max__ = (max); \
(void)(&start__ == &size__); \
(void)(&start__ == &max__); \
start__ > max__ || size__ > max__ - start__; \
})
#define range_overflows_t(type, start, size, max) \
range_overflows((type)(start), (type)(size), (type)(max))
/* Note we don't consider signbits :| */
#define overflows_type(x, T) \
(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
#define ptr_mask_bits(ptr, n) ({ \
unsigned long __v = (unsigned long)(ptr); \
(typeof(ptr))(__v & -BIT(n)); \
})
#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
#define ptr_unpack_bits(ptr, bits, n) ({ \
unsigned long __v = (unsigned long)(ptr); \
*(bits) = __v & (BIT(n) - 1); \
(typeof(ptr))(__v & -BIT(n)); \
})
#define ptr_pack_bits(ptr, bits, n) ({ \
unsigned long __bits = (bits); \
GEM_BUG_ON(__bits & -BIT(n)); \
((typeof(ptr))((unsigned long)(ptr) | __bits)); \
})
#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
#define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
#define fetch_and_zero(ptr) ({ \
typeof(*ptr) __T = *(ptr); \
*(ptr) = (typeof(*ptr))0; \
__T; \
})
drm/i915: Introduce the i915_user_extension_method An idea for extending uABI inspired by Vulkan's extension chains. Instead of expanding the data struct for each ioctl every time we need to add a new feature, define an extension chain instead. As we add optional interfaces to control the ioctl, we define a new extension struct that can be linked into the ioctl data only when required by the user. The key advantage being able to ignore large control structs for optional interfaces/extensions, while being able to process them in a consistent manner. In comparison to other extensible ioctls, the key difference is the use of a linked chain of extension structs vs an array of tagged pointers. For example, struct drm_amdgpu_cs_chunk { __u32 chunk_id; __u32 length_dw; __u64 chunk_data; }; struct drm_amdgpu_cs_in { __u32 ctx_id; __u32 bo_list_handle; __u32 num_chunks; __u32 _pad; __u64 chunks; }; allows userspace to pass in array of pointers to extension structs, but must therefore keep constructing that array along side the command stream. In dynamic situations like that, a linked list is preferred and does not similar from extra cache line misses as the extension structs themselves must still be loaded separate to the chunks array. v2: Apply the tail call optimisation directly to nip the worry of stack overflow in the bud. v3: Defend against recursion. v4: Fixup local types to match new uabi Opens: - do we include the result as an out-field in each chain? struct i915_user_extension { __u64 next_extension; __u64 name; __s32 result; __u32 mbz; /* reserved for future use */ }; * Undecided, so provision some room for future expansion. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
2019-03-22 17:23:22 +08:00
/*
* container_of_user: Extract the superclass from a pointer to a member.
*
* Exactly like container_of() with the exception that it plays nicely
* with sparse for __user @ptr.
*/
#define container_of_user(ptr, type, member) ({ \
void __user *__mptr = (void __user *)(ptr); \
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
!__same_type(*(ptr), void), \
"pointer type mismatch in container_of()"); \
((type __user *)(__mptr - offsetof(type, member))); })
/*
* check_user_mbz: Check that a user value exists and is zero
*
* Frequently in our uABI we reserve space for future extensions, and
* two ensure that userspace is prepared we enforce that space must
* be zero. (Then any future extension can safely assume a default value
* of 0.)
*
* check_user_mbz() combines checking that the user pointer is accessible
* and that the contained value is zero.
*
* Returns: -EFAULT if not accessible, -EINVAL if !zero, or 0 on success.
*/
#define check_user_mbz(U) ({ \
typeof(*(U)) mbz__; \
get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0; \
})
static inline u64 ptr_to_u64(const void *ptr)
{
return (uintptr_t)ptr;
}
#define u64_to_ptr(T, x) ({ \
typecheck(u64, x); \
(T *)(uintptr_t)(x); \
})
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
__idx; \
})
drm/i915: Split execlist priority queue into rbtree + linked list All the requests at the same priority are executed in FIFO order. They do not need to be stored in the rbtree themselves, as they are a simple list within a level. If we move the requests at one priority into a list, we can then reduce the rbtree to the set of priorities. This should keep the height of the rbtree small, as the number of active priorities can not exceed the number of active requests and should be typically only a few. Currently, we have ~2k possible different priority levels, that may increase to allow even more fine grained selection. Allocating those in advance seems a waste (and may be impossible), so we opt for allocating upon first use, and freeing after its requests are depleted. To avoid the possibility of an allocation failure causing us to lose a request, we preallocate the default priority (0) and bump any request to that priority if we fail to allocate it the appropriate plist. Having a request (that is ready to run, so not leading to corruption) execute out-of-order is better than leaking the request (and its dependency tree) entirely. There should be a benefit to reducing execlists_dequeue() to principally using a simple list (and reducing the frequency of both rbtree iteration and balancing on erase) but for typical workloads, request coalescing should be small enough that we don't notice any change. The main gain is from improving PI calls to schedule, and the explicit list within a level should make request unwinding simpler (we just need to insert at the head of the list rather than the tail and not have to make the rbtree search more complicated). v2: Avoid use-after-free when deleting a depleted priolist v3: Michał found the solution to handling the allocation failure gracefully. If we disable all priority scheduling following the allocation failure, those requests will be executed in fifo and we will ensure that this request and its dependencies are in strict fifo (even when it doesn't realise it is only a single list). Normal scheduling is restored once we know the device is idle, until the next failure! Suggested-by: Michał Wajdeczko <michal.wajdeczko@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-8-chris@chris-wilson.co.uk
2017-05-17 20:10:03 +08:00
#include <linux/list.h>
static inline void __list_del_many(struct list_head *head,
struct list_head *first)
{
first->prev = head;
WRITE_ONCE(head->next, first);
}
/*
* Wait until the work is finally complete, even if it tries to postpone
* by requeueing itself. Note, that if the worker never cancels itself,
* we will spin forever.
*/
static inline void drain_delayed_work(struct delayed_work *dw)
{
do {
while (flush_delayed_work(dw))
;
} while (delayed_work_pending(dw));
}
static inline const char *yesno(bool v)
{
return v ? "yes" : "no";
}
static inline const char *onoff(bool v)
{
return v ? "on" : "off";
}
static inline const char *enableddisabled(bool v)
{
return v ? "enabled" : "disabled";
}
#endif /* !__I915_UTILS_H */