2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2008 Advanced Micro Devices, Inc.
|
|
|
|
* Copyright 2008 Red Hat Inc.
|
|
|
|
* Copyright 2009 Jerome Glisse.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors: Dave Airlie
|
|
|
|
* Alex Deucher
|
|
|
|
* Jerome Glisse
|
|
|
|
*/
|
|
|
|
#ifndef __RADEON_H__
|
|
|
|
#define __RADEON_H__
|
|
|
|
|
|
|
|
/* TODO: Here are things that needs to be done :
|
|
|
|
* - surface allocator & initializer : (bit like scratch reg) should
|
|
|
|
* initialize HDP_ stuff on RS600, R600, R700 hw, well anythings
|
|
|
|
* related to surface
|
|
|
|
* - WB : write back stuff (do it bit like scratch reg things)
|
|
|
|
* - Vblank : look at Jesse's rework and what we should do
|
|
|
|
* - r600/r700: gart & cp
|
|
|
|
* - cs : clean cs ioctl use bitmap & things like that.
|
|
|
|
* - power management stuff
|
|
|
|
* - Barrier in gart code
|
|
|
|
* - Unmappabled vram ?
|
|
|
|
* - TESTING, TESTING, TESTING
|
|
|
|
*/
|
|
|
|
|
2009-09-29 00:34:43 +08:00
|
|
|
/* Initialization path:
|
|
|
|
* We expect that acceleration initialization might fail for various
|
|
|
|
* reasons even thought we work hard to make it works on most
|
|
|
|
* configurations. In order to still have a working userspace in such
|
|
|
|
* situation the init path must succeed up to the memory controller
|
|
|
|
* initialization point. Failure before this point are considered as
|
|
|
|
* fatal error. Here is the init callchain :
|
|
|
|
* radeon_device_init perform common structure, mutex initialization
|
|
|
|
* asic_init setup the GPU memory layout and perform all
|
|
|
|
* one time initialization (failure in this
|
|
|
|
* function are considered fatal)
|
|
|
|
* asic_startup setup the GPU acceleration, in order to
|
|
|
|
* follow guideline the first thing this
|
|
|
|
* function should do is setting the GPU
|
|
|
|
* memory controller (only MC setup failure
|
|
|
|
* are considered as fatal)
|
|
|
|
*/
|
|
|
|
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2009-06-05 20:42:42 +08:00
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/kref.h>
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include <ttm/ttm_bo_api.h>
|
|
|
|
#include <ttm/ttm_bo_driver.h>
|
|
|
|
#include <ttm/ttm_placement.h>
|
|
|
|
#include <ttm/ttm_module.h>
|
2010-11-17 20:38:32 +08:00
|
|
|
#include <ttm/ttm_execbuf_util.h>
|
2009-11-20 21:29:23 +08:00
|
|
|
|
2009-09-22 06:50:10 +08:00
|
|
|
#include "radeon_family.h"
|
2009-06-05 20:42:42 +08:00
|
|
|
#include "radeon_mode.h"
|
|
|
|
#include "radeon_reg.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Modules parameters.
|
|
|
|
*/
|
|
|
|
extern int radeon_no_wb;
|
|
|
|
extern int radeon_modeset;
|
|
|
|
extern int radeon_dynclks;
|
|
|
|
extern int radeon_r4xx_atom;
|
|
|
|
extern int radeon_agpmode;
|
|
|
|
extern int radeon_vram_limit;
|
|
|
|
extern int radeon_gart_size;
|
|
|
|
extern int radeon_benchmarking;
|
2009-07-21 17:23:57 +08:00
|
|
|
extern int radeon_testing;
|
2009-06-05 20:42:42 +08:00
|
|
|
extern int radeon_connector_table;
|
2009-08-13 14:32:14 +08:00
|
|
|
extern int radeon_tv;
|
2009-10-12 05:49:13 +08:00
|
|
|
extern int radeon_audio;
|
2010-03-31 12:33:27 +08:00
|
|
|
extern int radeon_disp_priority;
|
2010-03-17 14:07:37 +08:00
|
|
|
extern int radeon_hw_i2c;
|
2011-01-13 09:05:11 +08:00
|
|
|
extern int radeon_pcie_gen2;
|
2011-11-02 02:20:30 +08:00
|
|
|
extern int radeon_msi;
|
2012-05-02 21:11:21 +08:00
|
|
|
extern int radeon_lockup_timeout;
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy from radeon_drv.h so we don't have to include both and have conflicting
|
|
|
|
* symbol;
|
|
|
|
*/
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
|
|
|
#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
|
2010-02-16 04:36:13 +08:00
|
|
|
/* RADEON_IB_POOL_SIZE must be a power of 2 */
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_IB_POOL_SIZE 16
|
|
|
|
#define RADEON_DEBUGFS_MAX_COMPONENTS 32
|
|
|
|
#define RADEONFB_CONN_LIMIT 4
|
|
|
|
#define RADEON_BIOS_NUM_SCRATCH 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-11-18 09:13:28 +08:00
|
|
|
/* max number of rings */
|
2012-12-05 04:27:33 +08:00
|
|
|
#define RADEON_NUM_RINGS 5
|
2012-05-09 21:34:46 +08:00
|
|
|
|
|
|
|
/* fence seq are set to this number when signaled */
|
|
|
|
#define RADEON_FENCE_SIGNALED_SEQ 0LL
|
2011-11-18 09:13:28 +08:00
|
|
|
|
|
|
|
/* internal ring indices */
|
|
|
|
/* r1xx+ has gfx CP ring */
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_RING_TYPE_GFX_INDEX 0
|
2011-11-18 09:13:28 +08:00
|
|
|
|
|
|
|
/* cayman has 2 compute CP rings */
|
2012-05-09 21:34:46 +08:00
|
|
|
#define CAYMAN_RING_TYPE_CP1_INDEX 1
|
|
|
|
#define CAYMAN_RING_TYPE_CP2_INDEX 2
|
2011-11-18 09:13:28 +08:00
|
|
|
|
2012-09-28 03:08:35 +08:00
|
|
|
/* R600+ has an async dma ring */
|
|
|
|
#define R600_RING_TYPE_DMA_INDEX 3
|
2012-12-05 04:27:33 +08:00
|
|
|
/* cayman add a second async dma ring */
|
|
|
|
#define CAYMAN_RING_TYPE_DMA1_INDEX 4
|
2012-09-28 03:08:35 +08:00
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* hardcode those limit for now */
|
2012-09-11 22:09:59 +08:00
|
|
|
#define RADEON_VA_IB_OFFSET (1 << 20)
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_VA_RESERVED_SIZE (8 << 20)
|
|
|
|
#define RADEON_IB_VM_MAX_SIZE (64 << 10)
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2013-01-04 01:07:30 +08:00
|
|
|
/* reset flags */
|
|
|
|
#define RADEON_RESET_GFX (1 << 0)
|
|
|
|
#define RADEON_RESET_COMPUTE (1 << 1)
|
|
|
|
#define RADEON_RESET_DMA (1 << 2)
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Errata workarounds.
|
|
|
|
*/
|
|
|
|
enum radeon_pll_errata {
|
|
|
|
CHIP_ERRATA_R300_CG = 0x00000001,
|
|
|
|
CHIP_ERRATA_PLL_DUMMYREADS = 0x00000002,
|
|
|
|
CHIP_ERRATA_PLL_DELAY = 0x00000004
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct radeon_device;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BIOS.
|
|
|
|
*/
|
|
|
|
bool radeon_get_bios(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
/*
|
2009-09-08 08:10:24 +08:00
|
|
|
* Dummy page
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page {
|
|
|
|
struct page *page;
|
|
|
|
dma_addr_t addr;
|
|
|
|
};
|
|
|
|
int radeon_dummy_page_init(struct radeon_device *rdev);
|
|
|
|
void radeon_dummy_page_fini(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
/*
|
|
|
|
* Clocks
|
|
|
|
*/
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock {
|
|
|
|
struct radeon_pll p1pll;
|
|
|
|
struct radeon_pll p2pll;
|
2010-01-13 06:54:34 +08:00
|
|
|
struct radeon_pll dcpll;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_pll spll;
|
|
|
|
struct radeon_pll mpll;
|
|
|
|
/* 10 Khz units */
|
|
|
|
uint32_t default_mclk;
|
|
|
|
uint32_t default_sclk;
|
2010-01-13 06:54:34 +08:00
|
|
|
uint32_t default_dispclk;
|
|
|
|
uint32_t dp_extclk;
|
2011-06-09 01:01:11 +08:00
|
|
|
uint32_t max_pixel_clock;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-11-03 07:53:02 +08:00
|
|
|
/*
|
|
|
|
* Power management
|
|
|
|
*/
|
|
|
|
int radeon_pm_init(struct radeon_device *rdev);
|
2010-03-11 23:01:17 +08:00
|
|
|
void radeon_pm_fini(struct radeon_device *rdev);
|
2009-12-23 06:02:16 +08:00
|
|
|
void radeon_pm_compute_clocks(struct radeon_device *rdev);
|
2010-05-08 03:10:16 +08:00
|
|
|
void radeon_pm_suspend(struct radeon_device *rdev);
|
|
|
|
void radeon_pm_resume(struct radeon_device *rdev);
|
2009-12-29 02:58:44 +08:00
|
|
|
void radeon_combios_get_power_modes(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_get_power_modes(struct radeon_device *rdev);
|
2011-04-13 02:49:23 +08:00
|
|
|
void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
|
2010-07-01 00:02:03 +08:00
|
|
|
void rs690_pm_info(struct radeon_device *rdev);
|
2011-02-02 05:12:34 +08:00
|
|
|
extern int rv6xx_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int rv770_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int evergreen_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int sumo_get_temp(struct radeon_device *rdev);
|
2012-03-21 05:18:10 +08:00
|
|
|
extern int si_get_temp(struct radeon_device *rdev);
|
2011-12-17 06:03:42 +08:00
|
|
|
extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
|
|
|
|
unsigned *bankh, unsigned *mtaspect,
|
|
|
|
unsigned *tile_split);
|
2009-09-08 08:10:24 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Fences.
|
|
|
|
*/
|
|
|
|
struct radeon_fence_driver {
|
|
|
|
uint32_t scratch_reg;
|
2011-11-21 04:45:34 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
volatile uint32_t *cpu_addr;
|
2012-05-10 21:57:31 +08:00
|
|
|
/* sync_seq is protected by ring emission lock */
|
|
|
|
uint64_t sync_seq[RADEON_NUM_RINGS];
|
2012-05-09 21:34:46 +08:00
|
|
|
atomic64_t last_seq;
|
2012-05-02 21:11:13 +08:00
|
|
|
unsigned long last_activity;
|
2009-12-12 03:36:19 +08:00
|
|
|
bool initialized;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_fence {
|
|
|
|
struct radeon_device *rdev;
|
|
|
|
struct kref kref;
|
|
|
|
/* protected by radeon_fence.lock */
|
2012-05-09 21:34:46 +08:00
|
|
|
uint64_t seq;
|
2011-08-26 01:39:48 +08:00
|
|
|
/* RB, DMA, etc. */
|
2012-05-09 21:34:46 +08:00
|
|
|
unsigned ring;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-11-21 04:45:34 +08:00
|
|
|
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
|
|
|
|
int radeon_fence_driver_init(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_fence_driver_fini(struct radeon_device *rdev);
|
2012-12-17 23:29:06 +08:00
|
|
|
void radeon_fence_driver_force_completion(struct radeon_device *rdev);
|
2012-05-08 20:24:01 +08:00
|
|
|
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
|
2011-08-26 01:39:48 +08:00
|
|
|
void radeon_fence_process(struct radeon_device *rdev, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
bool radeon_fence_signaled(struct radeon_fence *fence);
|
|
|
|
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
|
2012-05-09 21:34:48 +08:00
|
|
|
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
|
2012-12-18 00:04:32 +08:00
|
|
|
int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
|
2012-05-09 21:34:55 +08:00
|
|
|
int radeon_fence_wait_any(struct radeon_device *rdev,
|
|
|
|
struct radeon_fence **fences,
|
|
|
|
bool intr);
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
|
|
|
|
void radeon_fence_unref(struct radeon_fence **fence);
|
2012-05-09 21:34:47 +08:00
|
|
|
unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
|
2012-05-10 21:57:31 +08:00
|
|
|
bool radeon_fence_need_sync(struct radeon_fence *fence, int ring);
|
|
|
|
void radeon_fence_note_sync(struct radeon_fence *fence, int ring);
|
|
|
|
static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
|
|
|
|
struct radeon_fence *b)
|
|
|
|
{
|
|
|
|
if (!a) {
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!b) {
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(a->ring != b->ring);
|
|
|
|
|
|
|
|
if (a->seq > b->seq) {
|
|
|
|
return a;
|
|
|
|
} else {
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
}
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2012-08-09 22:21:08 +08:00
|
|
|
static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
|
|
|
|
struct radeon_fence *b)
|
|
|
|
{
|
|
|
|
if (!a) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!b) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(a->ring != b->ring);
|
|
|
|
|
|
|
|
return a->seq < b->seq;
|
|
|
|
}
|
|
|
|
|
2009-06-24 07:48:08 +08:00
|
|
|
/*
|
|
|
|
* Tiling registers
|
|
|
|
*/
|
|
|
|
struct radeon_surface_reg {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *bo;
|
2009-06-24 07:48:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define RADEON_GEM_MAX_SURFACES 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2009-11-20 21:29:23 +08:00
|
|
|
* TTM.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_mman {
|
|
|
|
struct ttm_bo_global_ref bo_global_ref;
|
2010-03-09 08:56:52 +08:00
|
|
|
struct drm_global_reference mem_global_ref;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_bo_device bdev;
|
2009-12-12 03:36:19 +08:00
|
|
|
bool mem_global_referenced;
|
|
|
|
bool initialized;
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* bo virtual address in a specific vm */
|
|
|
|
struct radeon_bo_va {
|
2012-09-11 22:10:04 +08:00
|
|
|
/* protected by bo being reserved */
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct list_head bo_list;
|
|
|
|
uint64_t soffset;
|
|
|
|
uint64_t eoffset;
|
|
|
|
uint32_t flags;
|
|
|
|
bool valid;
|
2012-09-11 22:10:04 +08:00
|
|
|
unsigned ref_count;
|
|
|
|
|
|
|
|
/* protected by vm mutex */
|
|
|
|
struct list_head vm_list;
|
|
|
|
|
|
|
|
/* constant after initialization */
|
|
|
|
struct radeon_vm *vm;
|
|
|
|
struct radeon_bo *bo;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo {
|
|
|
|
/* Protected by gem.mutex */
|
|
|
|
struct list_head list;
|
|
|
|
/* Protected by tbo.reserved */
|
2009-12-07 22:52:58 +08:00
|
|
|
u32 placements[3];
|
|
|
|
struct ttm_placement placement;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_buffer_object tbo;
|
|
|
|
struct ttm_bo_kmap_obj kmap;
|
|
|
|
unsigned pin_count;
|
|
|
|
void *kptr;
|
|
|
|
u32 tiling_flags;
|
|
|
|
u32 pitch;
|
|
|
|
int surface_reg;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* list of all virtual address to which this bo
|
|
|
|
* is associated to
|
|
|
|
*/
|
|
|
|
struct list_head va;
|
2009-11-20 21:29:23 +08:00
|
|
|
/* Constant after initialization */
|
|
|
|
struct radeon_device *rdev;
|
2011-02-19 00:59:16 +08:00
|
|
|
struct drm_gem_object gem_base;
|
2012-05-31 20:52:53 +08:00
|
|
|
|
|
|
|
struct ttm_bo_kmap_obj dma_buf_vmap;
|
|
|
|
int vmapping_count;
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
2011-02-19 00:59:17 +08:00
|
|
|
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo_list {
|
2010-11-17 20:38:32 +08:00
|
|
|
struct ttm_validate_buffer tv;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *bo;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint64_t gpu_offset;
|
|
|
|
unsigned rdomain;
|
|
|
|
unsigned wdomain;
|
2009-11-20 21:29:23 +08:00
|
|
|
u32 tiling_flags;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
/* sub-allocation manager, it has to be protected by another lock.
|
|
|
|
* By conception this is an helper for other part of the driver
|
|
|
|
* like the indirect buffer or semaphore, which both have their
|
|
|
|
* locking.
|
|
|
|
*
|
|
|
|
* Principe is simple, we keep a list of sub allocation in offset
|
|
|
|
* order (first entry has offset == 0, last entry has the highest
|
|
|
|
* offset).
|
|
|
|
*
|
|
|
|
* When allocating new object we first check if there is room at
|
|
|
|
* the end total_size - (last_object_offset + last_object_size) >=
|
|
|
|
* alloc_size. If so we allocate new object there.
|
|
|
|
*
|
|
|
|
* When there is not enough room at the end, we start waiting for
|
|
|
|
* each sub object until we reach object_offset+object_size >=
|
|
|
|
* alloc_size, this object then become the sub object we return.
|
|
|
|
*
|
|
|
|
* Alignment can't be bigger than page size.
|
|
|
|
*
|
|
|
|
* Hole are not considered for allocation to keep things simple.
|
|
|
|
* Assumption is that there won't be hole (all object on same
|
|
|
|
* alignment).
|
|
|
|
*/
|
|
|
|
struct radeon_sa_manager {
|
2012-07-12 03:07:57 +08:00
|
|
|
wait_queue_head_t wq;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_bo *bo;
|
2012-05-09 21:34:56 +08:00
|
|
|
struct list_head *hole;
|
|
|
|
struct list_head flist[RADEON_NUM_RINGS];
|
|
|
|
struct list_head olist;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
unsigned size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
void *cpu_ptr;
|
|
|
|
uint32_t domain;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_sa_bo;
|
|
|
|
|
|
|
|
/* sub-allocation buffer */
|
|
|
|
struct radeon_sa_bo {
|
2012-05-09 21:34:56 +08:00
|
|
|
struct list_head olist;
|
|
|
|
struct list_head flist;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_sa_manager *manager;
|
2012-05-09 21:34:52 +08:00
|
|
|
unsigned soffset;
|
|
|
|
unsigned eoffset;
|
2012-05-09 21:34:54 +08:00
|
|
|
struct radeon_fence *fence;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GEM objects.
|
|
|
|
*/
|
|
|
|
struct radeon_gem {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct mutex mutex;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct list_head objects;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gem_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gem_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_gem_object_create(struct radeon_device *rdev, int size,
|
2009-11-20 21:29:23 +08:00
|
|
|
int alignment, int initial_domain,
|
|
|
|
bool discardable, bool kernel,
|
|
|
|
struct drm_gem_object **obj);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-02-07 10:16:14 +08:00
|
|
|
int radeon_mode_dumb_create(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
struct drm_mode_create_dumb *args);
|
|
|
|
int radeon_mode_dumb_mmap(struct drm_file *filp,
|
|
|
|
struct drm_device *dev,
|
|
|
|
uint32_t handle, uint64_t *offset_p);
|
|
|
|
int radeon_mode_dumb_destroy(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
uint32_t handle);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-12-22 01:13:47 +08:00
|
|
|
/*
|
|
|
|
* Semaphores.
|
|
|
|
*/
|
|
|
|
/* everything here is constant */
|
|
|
|
struct radeon_semaphore {
|
2012-05-09 21:34:57 +08:00
|
|
|
struct radeon_sa_bo *sa_bo;
|
|
|
|
signed waiters;
|
2011-12-22 01:13:47 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_semaphore_create(struct radeon_device *rdev,
|
|
|
|
struct radeon_semaphore **semaphore);
|
|
|
|
void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
|
|
|
struct radeon_semaphore *semaphore);
|
|
|
|
void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
|
|
|
struct radeon_semaphore *semaphore);
|
2012-05-02 21:11:18 +08:00
|
|
|
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
|
|
|
|
struct radeon_semaphore *semaphore,
|
2012-05-10 22:46:43 +08:00
|
|
|
int signaler, int waiter);
|
2011-12-22 01:13:47 +08:00
|
|
|
void radeon_semaphore_free(struct radeon_device *rdev,
|
2012-05-10 22:46:43 +08:00
|
|
|
struct radeon_semaphore **semaphore,
|
2012-05-09 21:34:57 +08:00
|
|
|
struct radeon_fence *fence);
|
2011-12-22 01:13:47 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GART structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc;
|
|
|
|
|
2009-10-14 12:34:41 +08:00
|
|
|
#define RADEON_GPU_PAGE_SIZE 4096
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
|
2011-09-17 00:04:08 +08:00
|
|
|
#define RADEON_GPU_PAGE_SHIFT 12
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
#define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK)
|
2009-10-14 12:34:41 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_gart {
|
|
|
|
dma_addr_t table_addr;
|
2011-11-03 23:16:49 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
void *ptr;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned num_gpu_pages;
|
|
|
|
unsigned num_cpu_pages;
|
|
|
|
unsigned table_size;
|
|
|
|
struct page **pages;
|
|
|
|
dma_addr_t *pages_addr;
|
|
|
|
bool ready;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gart_table_ram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_ram_free(struct radeon_device *rdev);
|
|
|
|
int radeon_gart_table_vram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_free(struct radeon_device *rdev);
|
2011-11-03 23:16:49 +08:00
|
|
|
int radeon_gart_table_vram_pin(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_unpin(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_gart_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_fini(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
|
|
int pages);
|
|
|
|
int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
2010-12-03 00:04:29 +08:00
|
|
|
int pages, struct page **pagelist,
|
|
|
|
dma_addr_t *dma_addr);
|
2011-11-03 23:16:49 +08:00
|
|
|
void radeon_gart_restore(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU MC structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc {
|
|
|
|
resource_size_t aper_size;
|
|
|
|
resource_size_t aper_base;
|
|
|
|
resource_size_t agp_base;
|
2009-07-21 18:39:30 +08:00
|
|
|
/* for some chips with <= 32MB we need to lie
|
|
|
|
* about vram size near mc fb location */
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 mc_vram_size;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
u64 visible_vram_size;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 gtt_size;
|
|
|
|
u64 gtt_start;
|
|
|
|
u64 gtt_end;
|
|
|
|
u64 vram_start;
|
|
|
|
u64 vram_end;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned vram_width;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 real_vram_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
int vram_mtrr;
|
|
|
|
bool vram_is_ddr;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
bool igp_sideport_enabled;
|
2010-07-15 22:51:10 +08:00
|
|
|
u64 gtt_base_align;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-01-06 00:27:29 +08:00
|
|
|
bool radeon_combios_sideport_present(struct radeon_device *rdev);
|
|
|
|
bool radeon_atombios_sideport_present(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU scratch registers structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_scratch {
|
|
|
|
unsigned num_reg;
|
2010-08-28 06:25:25 +08:00
|
|
|
uint32_t reg_base;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool free[32];
|
|
|
|
uint32_t reg[32];
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg);
|
|
|
|
void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IRQS.
|
|
|
|
*/
|
2010-11-21 23:59:01 +08:00
|
|
|
|
|
|
|
struct radeon_unpin_work {
|
|
|
|
struct work_struct work;
|
|
|
|
struct radeon_device *rdev;
|
|
|
|
int crtc_id;
|
|
|
|
struct radeon_fence *fence;
|
|
|
|
struct drm_pending_vblank_event *event;
|
|
|
|
struct radeon_bo *old_rbo;
|
|
|
|
u64 new_crtc_base;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct r500_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 hdmi0_status;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 hdmi0_status;
|
|
|
|
u32 hdmi1_status;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct evergreen_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 disp_int_cont3;
|
|
|
|
u32 disp_int_cont4;
|
|
|
|
u32 disp_int_cont5;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
|
|
|
u32 d3grph_int;
|
|
|
|
u32 d4grph_int;
|
|
|
|
u32 d5grph_int;
|
|
|
|
u32 d6grph_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 afmt_status1;
|
|
|
|
u32 afmt_status2;
|
|
|
|
u32 afmt_status3;
|
|
|
|
u32 afmt_status4;
|
|
|
|
u32 afmt_status5;
|
|
|
|
u32 afmt_status6;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
union radeon_irq_stat_regs {
|
|
|
|
struct r500_irq_stat_regs r500;
|
|
|
|
struct r600_irq_stat_regs r600;
|
|
|
|
struct evergreen_irq_stat_regs evergreen;
|
|
|
|
};
|
|
|
|
|
2011-10-27 03:43:58 +08:00
|
|
|
#define RADEON_MAX_HPD_PINS 6
|
|
|
|
#define RADEON_MAX_CRTCS 6
|
2012-03-30 20:59:57 +08:00
|
|
|
#define RADEON_MAX_AFMT_BLOCKS 6
|
2011-10-27 03:43:58 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_irq {
|
2012-05-17 07:33:30 +08:00
|
|
|
bool installed;
|
|
|
|
spinlock_t lock;
|
2012-05-18 01:52:00 +08:00
|
|
|
atomic_t ring_int[RADEON_NUM_RINGS];
|
2012-05-17 07:33:30 +08:00
|
|
|
bool crtc_vblank_int[RADEON_MAX_CRTCS];
|
2012-05-18 01:52:00 +08:00
|
|
|
atomic_t pflip[RADEON_MAX_CRTCS];
|
2012-05-17 07:33:30 +08:00
|
|
|
wait_queue_head_t vblank_queue;
|
|
|
|
bool hpd[RADEON_MAX_HPD_PINS];
|
|
|
|
bool afmt[RADEON_MAX_AFMT_BLOCKS];
|
|
|
|
union radeon_irq_stat_regs stat_regs;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_irq_kms_init(struct radeon_device *rdev);
|
|
|
|
void radeon_irq_kms_fini(struct radeon_device *rdev);
|
2011-11-18 09:13:28 +08:00
|
|
|
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
|
|
|
|
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
|
2010-11-21 23:59:01 +08:00
|
|
|
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
|
|
|
|
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
|
2012-05-17 07:33:30 +08:00
|
|
|
void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block);
|
|
|
|
void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block);
|
|
|
|
void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
|
|
|
|
void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2011-10-23 18:56:27 +08:00
|
|
|
* CP & rings.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2011-08-26 01:39:48 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib {
|
2012-05-09 21:35:00 +08:00
|
|
|
struct radeon_sa_bo *sa_bo;
|
|
|
|
uint32_t length_dw;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t *ptr;
|
2012-05-08 20:24:01 +08:00
|
|
|
int ring;
|
2012-05-09 21:35:00 +08:00
|
|
|
struct radeon_fence *fence;
|
2012-08-07 00:57:44 +08:00
|
|
|
struct radeon_vm *vm;
|
2012-05-09 21:35:00 +08:00
|
|
|
bool is_const_ib;
|
2012-05-10 22:46:43 +08:00
|
|
|
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
|
2012-05-09 21:35:00 +08:00
|
|
|
struct radeon_semaphore *semaphore;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *ring;
|
|
|
|
unsigned rptr;
|
2011-10-13 18:48:45 +08:00
|
|
|
unsigned rptr_offs;
|
|
|
|
unsigned rptr_reg;
|
2012-07-06 22:22:55 +08:00
|
|
|
unsigned rptr_save_reg;
|
2012-07-18 02:02:31 +08:00
|
|
|
u64 next_rptr_gpu_addr;
|
|
|
|
volatile u32 *next_rptr_cpu_addr;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned wptr;
|
|
|
|
unsigned wptr_old;
|
2011-10-13 18:48:45 +08:00
|
|
|
unsigned wptr_reg;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned ring_size;
|
|
|
|
unsigned ring_free_dw;
|
|
|
|
int count_dw;
|
2012-05-02 21:11:20 +08:00
|
|
|
unsigned long last_activity;
|
|
|
|
unsigned last_rptr;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t align_mask;
|
|
|
|
uint32_t ptr_mask;
|
|
|
|
bool ready;
|
2011-11-18 03:25:56 +08:00
|
|
|
u32 ptr_reg_shift;
|
|
|
|
u32 ptr_reg_mask;
|
|
|
|
u32 nop;
|
2012-07-18 02:02:30 +08:00
|
|
|
u32 idx;
|
2013-01-12 04:19:43 +08:00
|
|
|
u64 last_semaphore_signal_addr;
|
|
|
|
u64 last_semaphore_wait_addr;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* VM
|
|
|
|
*/
|
2012-08-09 22:21:08 +08:00
|
|
|
|
drm/radeon: add 2-level VM pagetables support v9
PDE/PTE update code uses CP ring for memory writes.
All page table entries are preallocated for now in alloc_pt().
It is made as whole because it's hard to divide it to several patches
that compile and doesn't break anything being applied separately.
Tested on cayman card.
v2: rebased on top of "refactor set_page chipset interface v3",
code cleanups
v3: switched offsets calc macros to inline funcs where possible,
remove pd_addr from radeon_vm, switched RADEON_BLOCK_SIZE define,
to 9 (and PTE_COUNT to 1 << BLOCK_SIZE)
v4 (ck): move "incr" documentation to previous patch, cleanup and
document RADEON_VM_* constants, change commit message to
our usual format, simplify patch allot by removing
everything current not necessary, disable SI workaround.
v5: (agd5f): Fix typo in tables_size calculation in
radeon_vm_alloc_pt(). Second line should have been
'+=' rather than '='.
v6: fix npdes calculation. In scenario when pfns to be mapped overlap
two PDE spans:
+-----------+-------------+
| PDE span | PDE span |
+-----------+----+--------+
| |
+---------+
| pfns |
+---------+
the following npdes calculation gives incorrect result:
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 1;
For the case above picture it should give npdes = 2, but gives one.
This patch corrects it by rounding last pfn up to 512 border,
first - down to 512 border and then subtracting and dividing by 512.
v7: Make npde calculation clearer, fix ndw calculation.
v8: (agd5f): reserve enough for 2 full VM PTs, add some
additional comments.
v9: fix typo in npde calculation
Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2012-09-18 01:36:19 +08:00
|
|
|
/* maximum number of VMIDs */
|
2012-08-09 22:21:08 +08:00
|
|
|
#define RADEON_NUM_VM 16
|
|
|
|
|
drm/radeon: add 2-level VM pagetables support v9
PDE/PTE update code uses CP ring for memory writes.
All page table entries are preallocated for now in alloc_pt().
It is made as whole because it's hard to divide it to several patches
that compile and doesn't break anything being applied separately.
Tested on cayman card.
v2: rebased on top of "refactor set_page chipset interface v3",
code cleanups
v3: switched offsets calc macros to inline funcs where possible,
remove pd_addr from radeon_vm, switched RADEON_BLOCK_SIZE define,
to 9 (and PTE_COUNT to 1 << BLOCK_SIZE)
v4 (ck): move "incr" documentation to previous patch, cleanup and
document RADEON_VM_* constants, change commit message to
our usual format, simplify patch allot by removing
everything current not necessary, disable SI workaround.
v5: (agd5f): Fix typo in tables_size calculation in
radeon_vm_alloc_pt(). Second line should have been
'+=' rather than '='.
v6: fix npdes calculation. In scenario when pfns to be mapped overlap
two PDE spans:
+-----------+-------------+
| PDE span | PDE span |
+-----------+----+--------+
| |
+---------+
| pfns |
+---------+
the following npdes calculation gives incorrect result:
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 1;
For the case above picture it should give npdes = 2, but gives one.
This patch corrects it by rounding last pfn up to 512 border,
first - down to 512 border and then subtracting and dividing by 512.
v7: Make npde calculation clearer, fix ndw calculation.
v8: (agd5f): reserve enough for 2 full VM PTs, add some
additional comments.
v9: fix typo in npde calculation
Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2012-09-18 01:36:19 +08:00
|
|
|
/* defines number of bits in page table versus page directory,
|
|
|
|
* a page is 4KB so we have 12 bits offset, 9 bits in the page
|
|
|
|
* table and the remaining 19 bits are in the page directory */
|
|
|
|
#define RADEON_VM_BLOCK_SIZE 9
|
|
|
|
|
|
|
|
/* number of entries in page table */
|
|
|
|
#define RADEON_VM_PTE_COUNT (1 << RADEON_VM_BLOCK_SIZE)
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct radeon_vm {
|
|
|
|
struct list_head list;
|
|
|
|
struct list_head va;
|
2012-08-09 22:21:08 +08:00
|
|
|
unsigned id;
|
2012-10-09 19:31:17 +08:00
|
|
|
|
|
|
|
/* contains the page directory */
|
|
|
|
struct radeon_sa_bo *page_directory;
|
|
|
|
uint64_t pd_gpu_addr;
|
|
|
|
|
|
|
|
/* array of page tables, one for each page directory entry */
|
|
|
|
struct radeon_sa_bo **page_tables;
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct mutex mutex;
|
|
|
|
/* last fence for cs using this vm */
|
|
|
|
struct radeon_fence *fence;
|
2012-08-08 18:22:43 +08:00
|
|
|
/* last flush or NULL if we still need to flush */
|
|
|
|
struct radeon_fence *last_flush;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_vm_manager {
|
2012-05-09 16:07:08 +08:00
|
|
|
struct mutex lock;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct list_head lru_vm;
|
2012-08-09 22:21:08 +08:00
|
|
|
struct radeon_fence *active[RADEON_NUM_VM];
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct radeon_sa_manager sa_manager;
|
|
|
|
uint32_t max_pfn;
|
|
|
|
/* number of VMIDs */
|
|
|
|
unsigned nvm;
|
|
|
|
/* vram base address for page table entry */
|
|
|
|
u64 vram_base_offset;
|
2012-01-06 22:38:15 +08:00
|
|
|
/* is vm enabled? */
|
|
|
|
bool enabled;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* file private structure
|
|
|
|
*/
|
|
|
|
struct radeon_fpriv {
|
|
|
|
struct radeon_vm vm;
|
|
|
|
};
|
|
|
|
|
2009-12-02 02:43:46 +08:00
|
|
|
/*
|
|
|
|
* R6xx+ IH ring
|
|
|
|
*/
|
|
|
|
struct r600_ih {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-12-02 02:43:46 +08:00
|
|
|
volatile uint32_t *ring;
|
|
|
|
unsigned rptr;
|
|
|
|
unsigned ring_size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t ptr_mask;
|
2012-05-17 03:45:24 +08:00
|
|
|
atomic_t lock;
|
2009-12-02 02:43:46 +08:00
|
|
|
bool enabled;
|
|
|
|
};
|
|
|
|
|
2011-10-13 11:29:40 +08:00
|
|
|
struct r600_blit_cp_primitives {
|
|
|
|
void (*set_render_target)(struct radeon_device *rdev, int format,
|
|
|
|
int w, int h, u64 gpu_addr);
|
|
|
|
void (*cp_set_surface_sync)(struct radeon_device *rdev,
|
|
|
|
u32 sync_type, u32 size,
|
|
|
|
u64 mc_addr);
|
|
|
|
void (*set_shaders)(struct radeon_device *rdev);
|
|
|
|
void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
|
|
|
|
void (*set_tex_resource)(struct radeon_device *rdev,
|
|
|
|
int format, int w, int h, int pitch,
|
2011-10-22 22:07:09 +08:00
|
|
|
u64 gpu_addr, u32 size);
|
2011-10-13 11:29:40 +08:00
|
|
|
void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
|
|
|
|
int x2, int y2);
|
|
|
|
void (*draw_auto)(struct radeon_device *rdev);
|
|
|
|
void (*set_default_state)(struct radeon_device *rdev);
|
|
|
|
};
|
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_blit {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *shader_obj;
|
2011-10-13 11:29:40 +08:00
|
|
|
struct r600_blit_cp_primitives primitives;
|
|
|
|
int max_dim;
|
|
|
|
int ring_size_common;
|
|
|
|
int ring_size_per_loop;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 shader_gpu_addr;
|
|
|
|
u32 vs_offset, ps_offset;
|
|
|
|
u32 state_offset;
|
|
|
|
u32 state_len;
|
|
|
|
};
|
|
|
|
|
2012-03-21 05:18:21 +08:00
|
|
|
/*
|
|
|
|
* SI RLC stuff
|
|
|
|
*/
|
|
|
|
struct si_rlc {
|
|
|
|
/* for power gating */
|
|
|
|
struct radeon_bo *save_restore_obj;
|
|
|
|
uint64_t save_restore_gpu_addr;
|
|
|
|
/* for clear state */
|
|
|
|
struct radeon_bo *clear_state_obj;
|
|
|
|
uint64_t clear_state_gpu_addr;
|
|
|
|
};
|
|
|
|
|
2011-12-22 01:13:46 +08:00
|
|
|
int radeon_ib_get(struct radeon_device *rdev, int ring,
|
2012-08-07 00:57:44 +08:00
|
|
|
struct radeon_ib *ib, struct radeon_vm *vm,
|
|
|
|
unsigned size);
|
2012-05-09 21:35:02 +08:00
|
|
|
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
|
2012-07-13 19:06:00 +08:00
|
|
|
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
|
|
|
|
struct radeon_ib *const_ib);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_ib_pool_init(struct radeon_device *rdev);
|
|
|
|
void radeon_ib_pool_fini(struct radeon_device *rdev);
|
2012-05-02 21:11:12 +08:00
|
|
|
int radeon_ib_ring_tests(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
/* Ring access between begin & end cannot sleep */
|
2012-07-18 02:02:31 +08:00
|
|
|
bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
|
|
|
|
struct radeon_ring *ring);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
|
|
|
int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
|
|
|
void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp);
|
2012-05-09 21:34:45 +08:00
|
|
|
void radeon_ring_undo(struct radeon_ring *ring);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
|
2012-05-02 21:11:23 +08:00
|
|
|
void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring);
|
2012-05-02 21:11:20 +08:00
|
|
|
void radeon_ring_lockup_update(struct radeon_ring *ring);
|
|
|
|
bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
|
2012-07-09 17:52:44 +08:00
|
|
|
unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
|
|
|
|
uint32_t **data);
|
|
|
|
int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
|
|
|
|
unsigned size, uint32_t *data);
|
2011-10-23 18:56:27 +08:00
|
|
|
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
|
2011-11-18 03:25:56 +08:00
|
|
|
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
|
|
|
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
2012-09-28 03:08:35 +08:00
|
|
|
/* r600 async dma */
|
|
|
|
void r600_dma_stop(struct radeon_device *rdev);
|
|
|
|
int r600_dma_resume(struct radeon_device *rdev);
|
|
|
|
void r600_dma_fini(struct radeon_device *rdev);
|
|
|
|
|
2012-12-05 04:28:18 +08:00
|
|
|
void cayman_dma_stop(struct radeon_device *rdev);
|
|
|
|
int cayman_dma_resume(struct radeon_device *rdev);
|
|
|
|
void cayman_dma_fini(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* CS.
|
|
|
|
*/
|
|
|
|
struct radeon_cs_reloc {
|
|
|
|
struct drm_gem_object *gobj;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
struct radeon_bo_list lobj;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint32_t handle;
|
|
|
|
uint32_t flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cs_chunk {
|
|
|
|
uint32_t chunk_id;
|
|
|
|
uint32_t length_dw;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int kpage_idx[2];
|
|
|
|
uint32_t *kpage[2];
|
2009-06-05 20:42:42 +08:00
|
|
|
uint32_t *kdata;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void __user *user_ptr;
|
|
|
|
int last_copied_page;
|
|
|
|
int last_page_index;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cs_parser {
|
2010-01-18 20:01:36 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_device *rdev;
|
|
|
|
struct drm_file *filp;
|
|
|
|
/* chunks */
|
|
|
|
unsigned nchunks;
|
|
|
|
struct radeon_cs_chunk *chunks;
|
|
|
|
uint64_t *chunks_array;
|
|
|
|
/* IB */
|
|
|
|
unsigned idx;
|
|
|
|
/* relocations */
|
|
|
|
unsigned nrelocs;
|
|
|
|
struct radeon_cs_reloc *relocs;
|
|
|
|
struct radeon_cs_reloc **relocs_ptr;
|
|
|
|
struct list_head validated;
|
2011-11-18 23:19:47 +08:00
|
|
|
unsigned dma_reloc_idx;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* indices of various chunks */
|
|
|
|
int chunk_ib_idx;
|
|
|
|
int chunk_relocs_idx;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int chunk_flags_idx;
|
2012-03-21 05:18:14 +08:00
|
|
|
int chunk_const_ib_idx;
|
2012-05-09 21:35:02 +08:00
|
|
|
struct radeon_ib ib;
|
|
|
|
struct radeon_ib const_ib;
|
2009-06-05 20:42:42 +08:00
|
|
|
void *track;
|
2009-09-08 08:10:24 +08:00
|
|
|
unsigned family;
|
2011-10-25 07:38:45 +08:00
|
|
|
int parser_error;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
u32 cs_flags;
|
|
|
|
u32 ring;
|
|
|
|
s32 priority;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-23 14:56:27 +08:00
|
|
|
extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
|
2011-10-14 07:08:47 +08:00
|
|
|
extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
|
2009-09-23 14:56:27 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_cs_packet {
|
|
|
|
unsigned idx;
|
|
|
|
unsigned type;
|
|
|
|
unsigned reg;
|
|
|
|
unsigned opcode;
|
|
|
|
int count;
|
|
|
|
unsigned one_reg_wr;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef int (*radeon_packet0_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt,
|
|
|
|
unsigned idx, unsigned reg);
|
|
|
|
typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AGP
|
|
|
|
*/
|
|
|
|
int radeon_agp_init(struct radeon_device *rdev);
|
2009-11-05 13:39:10 +08:00
|
|
|
void radeon_agp_resume(struct radeon_device *rdev);
|
2010-05-22 00:48:54 +08:00
|
|
|
void radeon_agp_suspend(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_agp_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Writeback
|
|
|
|
*/
|
|
|
|
struct radeon_wb {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *wb_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *wb;
|
|
|
|
uint64_t gpu_addr;
|
2010-08-28 06:25:25 +08:00
|
|
|
bool enabled;
|
2010-09-04 17:04:34 +08:00
|
|
|
bool use_event;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-08-28 06:25:25 +08:00
|
|
|
#define RADEON_WB_SCRATCH_OFFSET 0
|
2012-07-18 02:02:31 +08:00
|
|
|
#define RADEON_WB_RING0_NEXT_RPTR 256
|
2010-08-28 06:25:25 +08:00
|
|
|
#define RADEON_WB_CP_RPTR_OFFSET 1024
|
2011-03-03 09:07:31 +08:00
|
|
|
#define RADEON_WB_CP1_RPTR_OFFSET 1280
|
|
|
|
#define RADEON_WB_CP2_RPTR_OFFSET 1536
|
2012-09-28 03:08:35 +08:00
|
|
|
#define R600_WB_DMA_RPTR_OFFSET 1792
|
2010-08-28 06:25:25 +08:00
|
|
|
#define R600_WB_IH_WPTR_OFFSET 2048
|
2012-12-05 04:27:33 +08:00
|
|
|
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
|
2010-09-04 17:04:34 +08:00
|
|
|
#define R600_WB_EVENT_OFFSET 3072
|
2010-08-28 06:25:25 +08:00
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
/**
|
|
|
|
* struct radeon_pm - power management datas
|
|
|
|
* @max_bandwidth: maximum bandwidth the gpu has (MByte/s)
|
|
|
|
* @igp_sideport_mclk: sideport memory clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_system_mclk: system clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_clk: ht link clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_width: ht link width in bits (rs690,rs740,rs780,rs880)
|
|
|
|
* @k8_bandwidth: k8 bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @sideport_bandwidth: sideport bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @ht_bandwidth: ht bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @core_bandwidth: core GPU bandwidth the gpu has (MByte/s) (IGP)
|
2011-03-31 09:57:33 +08:00
|
|
|
* @sclk: GPU clock Mhz (core bandwidth depends of this clock)
|
2009-07-14 03:04:08 +08:00
|
|
|
* @needed_bandwidth: current bandwidth needs
|
|
|
|
*
|
|
|
|
* It keeps track of various data needed to take powermanagement decision.
|
2011-03-31 09:57:33 +08:00
|
|
|
* Bandwidth need is used to determine minimun clock of the GPU and memory.
|
2009-07-14 03:04:08 +08:00
|
|
|
* Equation between gpu/memory clock and available bandwidth is hw dependent
|
|
|
|
* (type of memory, bus size, efficiency, ...)
|
|
|
|
*/
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
enum radeon_pm_method {
|
|
|
|
PM_METHOD_PROFILE,
|
|
|
|
PM_METHOD_DYNPM,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum radeon_dynpm_state {
|
|
|
|
DYNPM_STATE_DISABLED,
|
|
|
|
DYNPM_STATE_MINIMUM,
|
|
|
|
DYNPM_STATE_PAUSED,
|
DRM / radeon / KMS: Fix hibernation regression related to radeon PM (was: Re: [Regression, post-2.6.34] Hibernation broken on machines with radeon/KMS and r300)
There is a regression from 2.6.34 related to the recent radeon power
management changes, caused by attempting to cancel a delayed work
item that's never been scheduled. However, the code as is has some
other issues potentially leading to visible problems.
First, the mutex around cancel_delayed_work() in radeon_pm_suspend()
doesn't really serve any purpose, because cancel_delayed_work() only
tries to delete the work's timer. Moreover, it doesn't prevent the
work handler from running, so the handler can do some wrong things if
it wins the race and in that case it will rearm itself to do some
more wrong things going forward. So, I think it's better to wait for
the handler to return in case it's already been queued up for
execution. Also, it should be prevented from rearming itself in that
case.
Second, in radeon_set_pm_method() the cancel_delayed_work() is not
sufficient to prevent the work handler from running and queing up
itself for the next run (the failure scenario is that
cancel_delayed_work() returns 0, so the handler is run, it waits on
the mutex and then rearms itself after the mutex has been released),
so again the work handler should be prevented from rearming itself in
that case..
Finally, there's a potential deadlock in radeon_pm_fini(), because
cancel_delayed_work_sync() is called under rdev->pm.mutex, but the
work handler tries to acquire the same mutex (if it wins the race).
Fix the issues described above.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-06-18 07:02:27 +08:00
|
|
|
DYNPM_STATE_ACTIVE,
|
|
|
|
DYNPM_STATE_SUSPENDED,
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_dynpm_action {
|
|
|
|
DYNPM_ACTION_NONE,
|
|
|
|
DYNPM_ACTION_MINIMUM,
|
|
|
|
DYNPM_ACTION_DOWNCLOCK,
|
|
|
|
DYNPM_ACTION_UPCLOCK,
|
|
|
|
DYNPM_ACTION_DEFAULT
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2009-12-29 02:58:44 +08:00
|
|
|
|
|
|
|
enum radeon_voltage_type {
|
|
|
|
VOLTAGE_NONE = 0,
|
|
|
|
VOLTAGE_GPIO,
|
|
|
|
VOLTAGE_VDDC,
|
|
|
|
VOLTAGE_SW
|
|
|
|
};
|
|
|
|
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type {
|
|
|
|
POWER_STATE_TYPE_DEFAULT,
|
|
|
|
POWER_STATE_TYPE_POWERSAVE,
|
|
|
|
POWER_STATE_TYPE_BATTERY,
|
|
|
|
POWER_STATE_TYPE_BALANCED,
|
|
|
|
POWER_STATE_TYPE_PERFORMANCE,
|
|
|
|
};
|
|
|
|
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_pm_profile_type {
|
|
|
|
PM_PROFILE_DEFAULT,
|
|
|
|
PM_PROFILE_AUTO,
|
|
|
|
PM_PROFILE_LOW,
|
2010-06-03 05:56:01 +08:00
|
|
|
PM_PROFILE_MID,
|
2010-05-08 03:10:16 +08:00
|
|
|
PM_PROFILE_HIGH,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define PM_PROFILE_DEFAULT_IDX 0
|
|
|
|
#define PM_PROFILE_LOW_SH_IDX 1
|
2010-06-03 05:56:01 +08:00
|
|
|
#define PM_PROFILE_MID_SH_IDX 2
|
|
|
|
#define PM_PROFILE_HIGH_SH_IDX 3
|
|
|
|
#define PM_PROFILE_LOW_MH_IDX 4
|
|
|
|
#define PM_PROFILE_MID_MH_IDX 5
|
|
|
|
#define PM_PROFILE_HIGH_MH_IDX 6
|
|
|
|
#define PM_PROFILE_MAX 7
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
struct radeon_pm_profile {
|
|
|
|
int dpms_off_ps_idx;
|
|
|
|
int dpms_on_ps_idx;
|
|
|
|
int dpms_off_cm_idx;
|
|
|
|
int dpms_on_cm_idx;
|
2009-12-24 03:28:05 +08:00
|
|
|
};
|
|
|
|
|
2010-07-03 00:58:16 +08:00
|
|
|
enum radeon_int_thermal_type {
|
|
|
|
THERMAL_TYPE_NONE,
|
|
|
|
THERMAL_TYPE_RV6XX,
|
|
|
|
THERMAL_TYPE_RV770,
|
|
|
|
THERMAL_TYPE_EVERGREEN,
|
2010-11-23 06:56:32 +08:00
|
|
|
THERMAL_TYPE_SUMO,
|
2011-01-07 10:19:22 +08:00
|
|
|
THERMAL_TYPE_NI,
|
2012-03-21 05:18:09 +08:00
|
|
|
THERMAL_TYPE_SI,
|
2010-07-03 00:58:16 +08:00
|
|
|
};
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_voltage {
|
|
|
|
enum radeon_voltage_type type;
|
|
|
|
/* gpio voltage */
|
|
|
|
struct radeon_gpio_rec gpio;
|
|
|
|
u32 delay; /* delay in usec from voltage drop to sclk change */
|
|
|
|
bool active_high; /* voltage drop is active when bit is high */
|
|
|
|
/* VDDC voltage */
|
|
|
|
u8 vddc_id; /* index into vddc voltage table */
|
|
|
|
u8 vddci_id; /* index into vddci voltage table */
|
|
|
|
bool vddci_enabled;
|
|
|
|
/* r6xx+ sw */
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 voltage;
|
|
|
|
/* evergreen+ vddci */
|
|
|
|
u16 vddci;
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-05-03 13:13:14 +08:00
|
|
|
/* clock mode flags */
|
|
|
|
#define RADEON_PM_MODE_NO_DISPLAY (1 << 0)
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_pm_clock_info {
|
|
|
|
/* memory clock */
|
|
|
|
u32 mclk;
|
|
|
|
/* engine clock */
|
|
|
|
u32 sclk;
|
|
|
|
/* voltage info */
|
|
|
|
struct radeon_voltage voltage;
|
2010-05-03 13:13:14 +08:00
|
|
|
/* standardized clock flags */
|
2009-12-29 02:58:44 +08:00
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2010-04-23 02:03:55 +08:00
|
|
|
/* state flags */
|
2010-05-03 13:13:14 +08:00
|
|
|
#define RADEON_PM_STATE_SINGLE_DISPLAY_ONLY (1 << 0)
|
2010-04-23 02:03:55 +08:00
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_power_state {
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type type;
|
2011-11-04 22:09:43 +08:00
|
|
|
struct radeon_pm_clock_info *clock_info;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid clock modes in this power state */
|
|
|
|
int num_clock_modes;
|
|
|
|
struct radeon_pm_clock_info *default_clock_mode;
|
2010-04-23 02:03:55 +08:00
|
|
|
/* standardized state flags */
|
|
|
|
u32 flags;
|
2010-04-23 02:25:19 +08:00
|
|
|
u32 misc; /* vbios specific flags */
|
|
|
|
u32 misc2; /* vbios specific flags */
|
|
|
|
int pcie_lanes; /* pcie lanes */
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-02-12 06:16:36 +08:00
|
|
|
/*
|
|
|
|
* Some modes are overclocked by very low value, accept them
|
|
|
|
*/
|
|
|
|
#define RADEON_MODE_OVERCLOCK_MARGIN 500 /* 5 MHz */
|
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm {
|
2009-12-23 06:02:16 +08:00
|
|
|
struct mutex mutex;
|
2012-05-11 20:57:18 +08:00
|
|
|
/* write locked while reprogramming mclk */
|
|
|
|
struct rw_semaphore mclk_lock;
|
2010-04-23 02:03:55 +08:00
|
|
|
u32 active_crtcs;
|
|
|
|
int active_crtc_count;
|
2009-12-23 06:02:16 +08:00
|
|
|
int req_vblank;
|
2010-03-03 05:06:51 +08:00
|
|
|
bool vblank_sync;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 max_bandwidth;
|
|
|
|
fixed20_12 igp_sideport_mclk;
|
|
|
|
fixed20_12 igp_system_mclk;
|
|
|
|
fixed20_12 igp_ht_link_clk;
|
|
|
|
fixed20_12 igp_ht_link_width;
|
|
|
|
fixed20_12 k8_bandwidth;
|
|
|
|
fixed20_12 sideport_bandwidth;
|
|
|
|
fixed20_12 ht_bandwidth;
|
|
|
|
fixed20_12 core_bandwidth;
|
|
|
|
fixed20_12 sclk;
|
2010-03-17 08:54:38 +08:00
|
|
|
fixed20_12 mclk;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 needed_bandwidth;
|
2011-02-03 07:42:03 +08:00
|
|
|
struct radeon_power_state *power_state;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid power states */
|
|
|
|
int num_power_states;
|
2010-04-23 02:03:55 +08:00
|
|
|
int current_power_state_index;
|
|
|
|
int current_clock_mode_index;
|
|
|
|
int requested_power_state_index;
|
|
|
|
int requested_clock_mode_index;
|
|
|
|
int default_power_state_index;
|
|
|
|
u32 current_sclk;
|
|
|
|
u32 current_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 current_vddc;
|
|
|
|
u16 current_vddci;
|
2011-01-07 10:19:26 +08:00
|
|
|
u32 default_sclk;
|
|
|
|
u32 default_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 default_vddc;
|
|
|
|
u16 default_vddci;
|
2010-03-11 23:01:17 +08:00
|
|
|
struct radeon_i2c_chan *i2c_bus;
|
2010-05-08 03:10:16 +08:00
|
|
|
/* selected pm method */
|
|
|
|
enum radeon_pm_method pm_method;
|
|
|
|
/* dynpm power management */
|
|
|
|
struct delayed_work dynpm_idle_work;
|
|
|
|
enum radeon_dynpm_state dynpm_state;
|
|
|
|
enum radeon_dynpm_action dynpm_planned_action;
|
|
|
|
unsigned long dynpm_action_timeout;
|
|
|
|
bool dynpm_can_upclock;
|
|
|
|
bool dynpm_can_downclock;
|
|
|
|
/* profile-based power management */
|
|
|
|
enum radeon_pm_profile_type profile;
|
|
|
|
int profile_index;
|
|
|
|
struct radeon_pm_profile profiles[PM_PROFILE_MAX];
|
2010-07-03 00:58:16 +08:00
|
|
|
/* internal thermal controller on rv6xx+ */
|
|
|
|
enum radeon_int_thermal_type int_thermal_type;
|
|
|
|
struct device *int_hwmon_dev;
|
2009-07-14 03:04:08 +08:00
|
|
|
};
|
|
|
|
|
2011-11-04 22:09:41 +08:00
|
|
|
int radeon_pm_get_type_index(struct radeon_device *rdev,
|
|
|
|
enum radeon_pm_state_type ps_type,
|
|
|
|
int instance);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2012-04-29 05:35:20 +08:00
|
|
|
struct r600_audio {
|
|
|
|
int channels;
|
|
|
|
int rate;
|
|
|
|
int bits_per_sample;
|
|
|
|
u8 status_bits;
|
|
|
|
u8 category_code;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Benchmarking
|
|
|
|
*/
|
2011-10-13 11:29:39 +08:00
|
|
|
void radeon_benchmark(struct radeon_device *rdev, int test_number);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
2009-07-21 17:23:57 +08:00
|
|
|
/*
|
|
|
|
* Testing
|
|
|
|
*/
|
|
|
|
void radeon_test_moves(struct radeon_device *rdev);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_ring_sync(struct radeon_device *rdev,
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring *cpA,
|
|
|
|
struct radeon_ring *cpB);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_syncing(struct radeon_device *rdev);
|
2009-07-21 17:23:57 +08:00
|
|
|
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Debugfs
|
|
|
|
*/
|
2011-10-24 20:54:54 +08:00
|
|
|
struct radeon_debugfs {
|
|
|
|
struct drm_info_list *files;
|
|
|
|
unsigned num_files;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_debugfs_add_files(struct radeon_device *rdev,
|
|
|
|
struct drm_info_list *files,
|
|
|
|
unsigned nfiles);
|
|
|
|
int radeon_debugfs_fence_init(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASIC specific functions.
|
|
|
|
*/
|
|
|
|
struct radeon_asic {
|
2009-06-17 19:28:30 +08:00
|
|
|
int (*init)(struct radeon_device *rdev);
|
2009-09-08 08:10:24 +08:00
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
int (*resume)(struct radeon_device *rdev);
|
|
|
|
int (*suspend)(struct radeon_device *rdev);
|
2009-09-21 12:33:58 +08:00
|
|
|
void (*vga_set_state)(struct radeon_device *rdev, bool state);
|
2010-03-09 22:45:11 +08:00
|
|
|
int (*asic_reset)(struct radeon_device *rdev);
|
2012-02-24 07:10:29 +08:00
|
|
|
/* ioctl hw specific callback. Some hw might want to perform special
|
|
|
|
* operation on specific ioctl. For instance on wait idle some hw
|
|
|
|
* might want to perform and HDP flush through MMIO as it seems that
|
|
|
|
* some R6XX/R7XX hw doesn't take HDP flush into account if programmed
|
|
|
|
* through ring.
|
|
|
|
*/
|
|
|
|
void (*ioctl_wait_idle)(struct radeon_device *rdev, struct radeon_bo *bo);
|
|
|
|
/* check if 3D engine is idle */
|
|
|
|
bool (*gui_idle)(struct radeon_device *rdev);
|
|
|
|
/* wait for mc_idle */
|
|
|
|
int (*mc_wait_for_idle)(struct radeon_device *rdev);
|
|
|
|
/* gart */
|
2012-02-24 06:53:46 +08:00
|
|
|
struct {
|
|
|
|
void (*tlb_flush)(struct radeon_device *rdev);
|
|
|
|
int (*set_page)(struct radeon_device *rdev, int i, uint64_t addr);
|
|
|
|
} gart;
|
2012-08-07 02:21:10 +08:00
|
|
|
struct {
|
|
|
|
int (*init)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
2012-08-11 21:00:30 +08:00
|
|
|
|
|
|
|
u32 pt_ring_index;
|
2012-09-18 01:36:18 +08:00
|
|
|
void (*set_page)(struct radeon_device *rdev, uint64_t pe,
|
|
|
|
uint64_t addr, unsigned count,
|
|
|
|
uint32_t incr, uint32_t flags);
|
2012-08-07 02:21:10 +08:00
|
|
|
} vm;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* ring specific callbacks */
|
2011-10-20 01:02:21 +08:00
|
|
|
struct {
|
|
|
|
void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
|
2011-10-20 01:02:21 +08:00
|
|
|
void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
|
2011-10-23 18:56:27 +08:00
|
|
|
void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
|
2011-10-20 01:02:21 +08:00
|
|
|
struct radeon_semaphore *semaphore, bool emit_wait);
|
2012-02-23 22:18:44 +08:00
|
|
|
int (*cs_parse)(struct radeon_cs_parser *p);
|
2012-02-24 06:53:45 +08:00
|
|
|
void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
|
2012-05-02 21:11:09 +08:00
|
|
|
bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
|
2012-10-03 02:43:38 +08:00
|
|
|
void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
|
2011-10-20 01:02:21 +08:00
|
|
|
} ring[RADEON_NUM_RINGS];
|
2012-02-24 07:10:29 +08:00
|
|
|
/* irqs */
|
2012-02-24 06:53:43 +08:00
|
|
|
struct {
|
|
|
|
int (*set)(struct radeon_device *rdev);
|
|
|
|
int (*process)(struct radeon_device *rdev);
|
|
|
|
} irq;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* displays */
|
2012-02-24 06:53:47 +08:00
|
|
|
struct {
|
|
|
|
/* display watermarks */
|
|
|
|
void (*bandwidth_update)(struct radeon_device *rdev);
|
|
|
|
/* get frame count */
|
|
|
|
u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
|
|
|
|
/* wait for vblank */
|
|
|
|
void (*wait_for_vblank)(struct radeon_device *rdev, int crtc);
|
2012-08-03 23:39:43 +08:00
|
|
|
/* set backlight level */
|
|
|
|
void (*set_backlight_level)(struct radeon_encoder *radeon_encoder, u8 level);
|
2012-09-14 21:59:26 +08:00
|
|
|
/* get backlight level */
|
|
|
|
u8 (*get_backlight_level)(struct radeon_encoder *radeon_encoder);
|
2012-02-24 06:53:47 +08:00
|
|
|
} display;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* copy functions for bo handling */
|
2012-02-24 06:53:42 +08:00
|
|
|
struct {
|
|
|
|
int (*blit)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
2012-05-08 20:24:01 +08:00
|
|
|
struct radeon_fence **fence);
|
2012-02-24 06:53:42 +08:00
|
|
|
u32 blit_ring_index;
|
|
|
|
int (*dma)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
2012-05-08 20:24:01 +08:00
|
|
|
struct radeon_fence **fence);
|
2012-02-24 06:53:42 +08:00
|
|
|
u32 dma_ring_index;
|
|
|
|
/* method used for bo copy */
|
|
|
|
int (*copy)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
2012-05-08 20:24:01 +08:00
|
|
|
struct radeon_fence **fence);
|
2012-02-24 06:53:42 +08:00
|
|
|
/* ring used for bo copies */
|
|
|
|
u32 copy_ring_index;
|
|
|
|
} copy;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* surfaces */
|
2012-02-24 06:53:49 +08:00
|
|
|
struct {
|
|
|
|
int (*set_reg)(struct radeon_device *rdev, int reg,
|
|
|
|
uint32_t tiling_flags, uint32_t pitch,
|
|
|
|
uint32_t offset, uint32_t obj_size);
|
|
|
|
void (*clear_reg)(struct radeon_device *rdev, int reg);
|
|
|
|
} surface;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* hotplug detect */
|
2012-02-24 06:53:39 +08:00
|
|
|
struct {
|
|
|
|
void (*init)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
bool (*sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
|
|
|
void (*set_polarity)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
|
|
|
} hpd;
|
2010-05-08 03:10:16 +08:00
|
|
|
/* power management */
|
2012-02-24 06:53:41 +08:00
|
|
|
struct {
|
|
|
|
void (*misc)(struct radeon_device *rdev);
|
|
|
|
void (*prepare)(struct radeon_device *rdev);
|
|
|
|
void (*finish)(struct radeon_device *rdev);
|
|
|
|
void (*init_profile)(struct radeon_device *rdev);
|
|
|
|
void (*get_dynpm_state)(struct radeon_device *rdev);
|
2012-02-24 06:53:48 +08:00
|
|
|
uint32_t (*get_engine_clock)(struct radeon_device *rdev);
|
|
|
|
void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
|
|
|
|
uint32_t (*get_memory_clock)(struct radeon_device *rdev);
|
|
|
|
void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
|
|
|
|
int (*get_pcie_lanes)(struct radeon_device *rdev);
|
|
|
|
void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
|
|
|
|
void (*set_clock_gating)(struct radeon_device *rdev, int enable);
|
2012-02-24 06:53:41 +08:00
|
|
|
} pm;
|
2010-11-21 23:59:01 +08:00
|
|
|
/* pageflipping */
|
2012-02-24 06:53:40 +08:00
|
|
|
struct {
|
|
|
|
void (*pre_page_flip)(struct radeon_device *rdev, int crtc);
|
|
|
|
u32 (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base);
|
|
|
|
void (*post_page_flip)(struct radeon_device *rdev, int crtc);
|
|
|
|
} pflip;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
/*
|
|
|
|
* Asic structures
|
|
|
|
*/
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 hdp_cntl;
|
2009-09-01 13:25:57 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
struct r300_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 resync_scratch;
|
|
|
|
u32 hdp_cntl;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct rv770_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_fize;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic {
|
|
|
|
unsigned num_ses;
|
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2010-03-25 01:33:47 +08:00
|
|
|
};
|
|
|
|
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_pipes_per_simd;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds_per_se;
|
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_shader_engines;
|
|
|
|
unsigned num_shader_pipes_per_simd;
|
|
|
|
unsigned num_tile_pipes;
|
|
|
|
unsigned num_simds_per_se;
|
|
|
|
unsigned num_backends_per_se;
|
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
|
|
|
};
|
|
|
|
|
2012-03-21 05:18:11 +08:00
|
|
|
struct si_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_tile_pipes;
|
2012-06-02 06:58:22 +08:00
|
|
|
unsigned max_cu_per_sh;
|
|
|
|
unsigned max_sh_per_se;
|
2012-03-21 05:18:11 +08:00
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sc_prim_fifo_size_frontend;
|
|
|
|
unsigned sc_prim_fifo_size_backend;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_tile_pipes;
|
|
|
|
unsigned num_backends_per_se;
|
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
|
|
|
};
|
|
|
|
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config {
|
|
|
|
struct r300_asic r300;
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic r100;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_asic r600;
|
|
|
|
struct rv770_asic rv770;
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic evergreen;
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic cayman;
|
2012-03-21 05:18:11 +08:00
|
|
|
struct si_asic si;
|
2009-06-17 19:28:30 +08:00
|
|
|
};
|
|
|
|
|
2010-03-12 05:19:14 +08:00
|
|
|
/*
|
|
|
|
* asic initizalization from radeon_asic.c
|
|
|
|
*/
|
|
|
|
void radeon_agp_disable(struct radeon_device *rdev);
|
|
|
|
int radeon_asic_init(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* IOCTL.
|
|
|
|
*/
|
|
|
|
int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pread_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
2009-06-24 07:48:08 +08:00
|
|
|
int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/* VRAM scratch page for HDP bug, default vram page */
|
|
|
|
struct r600_vram_scratch {
|
2010-08-28 01:59:54 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
volatile uint32_t *ptr;
|
2011-10-28 22:30:02 +08:00
|
|
|
u64 gpu_addr;
|
2010-08-28 01:59:54 +08:00
|
|
|
};
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2012-08-16 23:11:18 +08:00
|
|
|
/*
|
|
|
|
* ACPI
|
|
|
|
*/
|
|
|
|
struct radeon_atif_notification_cfg {
|
|
|
|
bool enabled;
|
|
|
|
int command_code;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif_notifications {
|
|
|
|
bool display_switch;
|
|
|
|
bool expansion_mode_change;
|
|
|
|
bool thermal_state;
|
|
|
|
bool forced_power_state;
|
|
|
|
bool system_power_state;
|
|
|
|
bool display_conf_change;
|
|
|
|
bool px_gfx_switch;
|
|
|
|
bool brightness_change;
|
|
|
|
bool dgpu_display_event;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif_functions {
|
|
|
|
bool system_params;
|
|
|
|
bool sbios_requests;
|
|
|
|
bool select_active_disp;
|
|
|
|
bool lid_state;
|
|
|
|
bool get_tv_standard;
|
|
|
|
bool set_tv_standard;
|
|
|
|
bool get_panel_expansion_mode;
|
|
|
|
bool set_panel_expansion_mode;
|
|
|
|
bool temperature_change;
|
|
|
|
bool graphics_device_types;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif {
|
|
|
|
struct radeon_atif_notifications notifications;
|
|
|
|
struct radeon_atif_functions functions;
|
|
|
|
struct radeon_atif_notification_cfg notification_cfg;
|
2012-08-03 23:39:43 +08:00
|
|
|
struct radeon_encoder *encoder_for_bl;
|
2012-08-16 23:11:18 +08:00
|
|
|
};
|
2011-11-11 01:57:26 +08:00
|
|
|
|
2012-08-16 23:13:43 +08:00
|
|
|
struct radeon_atcs_functions {
|
|
|
|
bool get_ext_state;
|
|
|
|
bool pcie_perf_req;
|
|
|
|
bool pcie_dev_rdy;
|
|
|
|
bool pcie_bus_width;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atcs {
|
|
|
|
struct radeon_atcs_functions functions;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Core structure, functions and helpers.
|
|
|
|
*/
|
|
|
|
typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t);
|
|
|
|
typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t);
|
|
|
|
|
|
|
|
struct radeon_device {
|
2009-09-11 21:35:22 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct drm_device *ddev;
|
|
|
|
struct pci_dev *pdev;
|
2012-07-03 00:45:19 +08:00
|
|
|
struct rw_semaphore exclusive_lock;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* ASIC */
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config config;
|
2009-06-05 20:42:42 +08:00
|
|
|
enum radeon_family family;
|
|
|
|
unsigned long flags;
|
|
|
|
int usec_timeout;
|
|
|
|
enum radeon_pll_errata pll_errata;
|
|
|
|
int num_gb_pipes;
|
2009-08-20 07:11:39 +08:00
|
|
|
int num_z_pipes;
|
2009-06-05 20:42:42 +08:00
|
|
|
int disp_priority;
|
|
|
|
/* BIOS */
|
|
|
|
uint8_t *bios;
|
|
|
|
bool is_atom_bios;
|
|
|
|
uint16_t bios_header_start;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *stollen_vga_memory;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* Register mmio */
|
2009-06-29 16:29:12 +08:00
|
|
|
resource_size_t rmmio_base;
|
|
|
|
resource_size_t rmmio_size;
|
2012-12-02 21:06:15 +08:00
|
|
|
/* protects concurrent MM_INDEX/DATA based register access */
|
|
|
|
spinlock_t mmio_idx_lock;
|
2011-07-13 14:28:12 +08:00
|
|
|
void __iomem *rmmio;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t mc_rreg;
|
|
|
|
radeon_wreg_t mc_wreg;
|
|
|
|
radeon_rreg_t pll_rreg;
|
|
|
|
radeon_wreg_t pll_wreg;
|
2009-08-12 16:43:14 +08:00
|
|
|
uint32_t pcie_reg_mask;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t pciep_rreg;
|
|
|
|
radeon_wreg_t pciep_wreg;
|
2010-06-30 23:52:50 +08:00
|
|
|
/* io port */
|
|
|
|
void __iomem *rio_mem;
|
|
|
|
resource_size_t rio_mem_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock clock;
|
|
|
|
struct radeon_mc mc;
|
|
|
|
struct radeon_gart gart;
|
|
|
|
struct radeon_mode_info mode_info;
|
|
|
|
struct radeon_scratch scratch;
|
|
|
|
struct radeon_mman mman;
|
2011-08-26 01:39:48 +08:00
|
|
|
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
|
2012-05-09 21:34:55 +08:00
|
|
|
wait_queue_head_t fence_queue;
|
2012-05-09 21:34:45 +08:00
|
|
|
struct mutex ring_lock;
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring ring[RADEON_NUM_RINGS];
|
2012-05-09 21:34:58 +08:00
|
|
|
bool ib_pool_ready;
|
|
|
|
struct radeon_sa_manager ring_tmp_bo;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_irq irq;
|
|
|
|
struct radeon_asic *asic;
|
|
|
|
struct radeon_gem gem;
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm pm;
|
2009-09-15 10:21:01 +08:00
|
|
|
uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_wb wb;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page dummy_page;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool shutdown;
|
|
|
|
bool suspend;
|
2009-07-10 20:36:26 +08:00
|
|
|
bool need_dma32;
|
2009-09-16 21:24:21 +08:00
|
|
|
bool accel_working;
|
2009-06-24 07:48:08 +08:00
|
|
|
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
|
2009-09-08 08:10:24 +08:00
|
|
|
const struct firmware *me_fw; /* all family ME firmware */
|
|
|
|
const struct firmware *pfp_fw; /* r6/700 PFP firmware */
|
2009-12-02 02:43:46 +08:00
|
|
|
const struct firmware *rlc_fw; /* r6/700 RLC firmware */
|
2011-01-07 10:19:31 +08:00
|
|
|
const struct firmware *mc_fw; /* NI MC firmware */
|
2012-03-21 05:18:17 +08:00
|
|
|
const struct firmware *ce_fw; /* SI CE firmware */
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_blit r600_blit;
|
2011-10-28 22:30:02 +08:00
|
|
|
struct r600_vram_scratch vram_scratch;
|
2009-10-17 00:21:24 +08:00
|
|
|
int msi_enabled; /* msi enabled */
|
2009-12-02 02:43:46 +08:00
|
|
|
struct r600_ih ih; /* r6/700 interrupt ring */
|
2012-03-21 05:18:21 +08:00
|
|
|
struct si_rlc rlc;
|
2009-12-05 05:56:37 +08:00
|
|
|
struct work_struct hotplug_work;
|
2012-03-30 20:59:57 +08:00
|
|
|
struct work_struct audio_work;
|
2010-02-02 05:02:25 +08:00
|
|
|
int num_crtc; /* number of crtcs */
|
2009-12-23 16:23:21 +08:00
|
|
|
struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
|
2012-05-15 03:25:57 +08:00
|
|
|
bool audio_enabled;
|
|
|
|
struct r600_audio audio_status; /* audio stuff */
|
2010-05-08 03:10:16 +08:00
|
|
|
struct notifier_block acpi_nb;
|
2011-01-05 12:46:48 +08:00
|
|
|
/* only one userspace can use Hyperz features or CMASK at a time */
|
2010-07-13 09:11:11 +08:00
|
|
|
struct drm_file *hyperz_filp;
|
2011-01-05 12:46:48 +08:00
|
|
|
struct drm_file *cmask_filp;
|
2010-08-06 09:21:16 +08:00
|
|
|
/* i2c buses */
|
|
|
|
struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
|
2011-10-24 20:54:54 +08:00
|
|
|
/* debugfs */
|
|
|
|
struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
|
|
|
|
unsigned debugfs_count;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* virtual memory */
|
|
|
|
struct radeon_vm_manager vm_manager;
|
2012-08-09 22:34:17 +08:00
|
|
|
struct mutex gpu_clock_mutex;
|
2012-08-16 23:11:18 +08:00
|
|
|
/* ACPI interface */
|
|
|
|
struct radeon_atif atif;
|
2012-08-16 23:13:43 +08:00
|
|
|
struct radeon_atcs atcs;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_device_init(struct radeon_device *rdev,
|
|
|
|
struct drm_device *ddev,
|
|
|
|
struct pci_dev *pdev,
|
|
|
|
uint32_t flags);
|
|
|
|
void radeon_device_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
|
|
|
|
|
2012-12-02 21:02:51 +08:00
|
|
|
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
|
|
|
|
bool always_indirect);
|
|
|
|
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
|
|
|
|
bool always_indirect);
|
2011-10-14 07:08:42 +08:00
|
|
|
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
2010-06-30 23:52:50 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
/*
|
|
|
|
* Cast helper
|
|
|
|
*/
|
|
|
|
#define to_radeon_fence(p) ((struct radeon_fence *)(p))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Registers read & write functions.
|
|
|
|
*/
|
2011-07-13 14:28:12 +08:00
|
|
|
#define RREG8(reg) readb((rdev->rmmio) + (reg))
|
|
|
|
#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
|
|
|
|
#define RREG16(reg) readw((rdev->rmmio) + (reg))
|
|
|
|
#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
|
2012-12-02 21:02:51 +08:00
|
|
|
#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
|
|
|
|
#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
|
|
|
|
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
|
|
|
|
#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
|
|
|
|
#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
|
2009-06-05 20:42:42 +08:00
|
|
|
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PLL(reg, v) rdev->pll_wreg(rdev, (reg), (v))
|
|
|
|
#define RREG32_MC(reg) rdev->mc_rreg(rdev, (reg))
|
|
|
|
#define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
|
2009-08-12 16:43:14 +08:00
|
|
|
#define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
|
2010-02-19 04:24:28 +08:00
|
|
|
#define RREG32_PCIE_P(reg) rdev->pciep_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE_P(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define WREG32_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32(reg, tmp_); \
|
|
|
|
} while (0)
|
|
|
|
#define WREG32_PLL_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32_PLL(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32_PLL(reg, tmp_); \
|
|
|
|
} while (0)
|
2012-12-02 21:02:51 +08:00
|
|
|
#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
|
2010-06-30 23:52:50 +08:00
|
|
|
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
|
|
|
|
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-08-12 16:43:14 +08:00
|
|
|
/*
|
|
|
|
* Indirect registers accessor
|
|
|
|
*/
|
|
|
|
static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
|
|
|
|
{
|
|
|
|
uint32_t r;
|
|
|
|
|
|
|
|
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
|
|
|
|
r = RREG32(RADEON_PCIE_DATA);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
|
|
|
|
{
|
|
|
|
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
|
|
|
|
WREG32(RADEON_PCIE_DATA, (v));
|
|
|
|
}
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
void r100_pll_errata_after_index(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs helpers.
|
|
|
|
*/
|
2009-07-14 00:02:32 +08:00
|
|
|
#define ASIC_IS_RN50(rdev) ((rdev->pdev->device == 0x515e) || \
|
|
|
|
(rdev->pdev->device == 0x5969))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
|
|
|
|
(rdev->family == CHIP_RV200) || \
|
|
|
|
(rdev->family == CHIP_RS100) || \
|
|
|
|
(rdev->family == CHIP_RS200) || \
|
|
|
|
(rdev->family == CHIP_RV250) || \
|
|
|
|
(rdev->family == CHIP_RV280) || \
|
|
|
|
(rdev->family == CHIP_RS300))
|
|
|
|
#define ASIC_IS_R300(rdev) ((rdev->family == CHIP_R300) || \
|
|
|
|
(rdev->family == CHIP_RV350) || \
|
|
|
|
(rdev->family == CHIP_R350) || \
|
|
|
|
(rdev->family == CHIP_RV380) || \
|
|
|
|
(rdev->family == CHIP_R420) || \
|
|
|
|
(rdev->family == CHIP_R423) || \
|
|
|
|
(rdev->family == CHIP_RV410) || \
|
|
|
|
(rdev->family == CHIP_RS400) || \
|
|
|
|
(rdev->family == CHIP_RS480))
|
2011-01-07 07:49:34 +08:00
|
|
|
#define ASIC_IS_X2(rdev) ((rdev->ddev->pdev->device == 0x9441) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9443) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x944B) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9506) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9509) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x950F) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689C) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689D))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
|
2010-11-17 01:09:41 +08:00
|
|
|
#define ASIC_IS_DCE2(rdev) ((rdev->family == CHIP_RS600) || \
|
|
|
|
(rdev->family == CHIP_RS690) || \
|
|
|
|
(rdev->family == CHIP_RS740) || \
|
|
|
|
(rdev->family >= CHIP_R600))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
|
|
|
|
#define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
|
2010-01-13 06:54:34 +08:00
|
|
|
#define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
|
2011-01-07 10:19:11 +08:00
|
|
|
#define ASIC_IS_DCE41(rdev) ((rdev->family >= CHIP_PALM) && \
|
|
|
|
(rdev->flags & RADEON_IS_IGP))
|
2011-01-07 10:19:12 +08:00
|
|
|
#define ASIC_IS_DCE5(rdev) ((rdev->family >= CHIP_BARTS))
|
2012-03-21 05:18:28 +08:00
|
|
|
#define ASIC_IS_DCE6(rdev) ((rdev->family >= CHIP_ARUBA))
|
|
|
|
#define ASIC_IS_DCE61(rdev) ((rdev->family >= CHIP_ARUBA) && \
|
|
|
|
(rdev->flags & RADEON_IS_IGP))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* BIOS helpers.
|
|
|
|
*/
|
|
|
|
#define RBIOS8(i) (rdev->bios[i])
|
|
|
|
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
|
|
|
|
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
|
|
|
|
|
|
|
|
int radeon_combios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_combios_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_atombios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RING helpers.
|
|
|
|
*/
|
2011-10-14 07:08:47 +08:00
|
|
|
#if DRM_DEBUG_CODE == 0
|
2011-10-23 18:56:27 +08:00
|
|
|
static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
|
2009-06-05 20:42:42 +08:00
|
|
|
{
|
2011-10-23 18:56:27 +08:00
|
|
|
ring->ring[ring->wptr++] = v;
|
|
|
|
ring->wptr &= ring->ptr_mask;
|
|
|
|
ring->count_dw--;
|
|
|
|
ring->ring_free_dw--;
|
2009-06-05 20:42:42 +08:00
|
|
|
}
|
2011-10-14 07:08:47 +08:00
|
|
|
#else
|
|
|
|
/* With debugging this is just too big to inline */
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
|
2011-10-14 07:08:47 +08:00
|
|
|
#endif
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs macro.
|
|
|
|
*/
|
2009-06-17 19:28:30 +08:00
|
|
|
#define radeon_init(rdev) (rdev)->asic->init((rdev))
|
2009-09-08 08:10:24 +08:00
|
|
|
#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
|
|
|
|
#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
|
|
|
|
#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
|
2012-02-23 22:18:44 +08:00
|
|
|
#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p))
|
2009-09-21 12:33:58 +08:00
|
|
|
#define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
|
2010-03-09 22:45:11 +08:00
|
|
|
#define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
|
2012-02-24 06:53:46 +08:00
|
|
|
#define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
|
|
|
|
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
|
2012-08-07 02:21:10 +08:00
|
|
|
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
|
|
|
|
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
|
2012-09-18 01:36:18 +08:00
|
|
|
#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags)))
|
2012-02-24 06:53:45 +08:00
|
|
|
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
|
|
|
|
#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
|
|
|
|
#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
|
2011-10-20 01:02:21 +08:00
|
|
|
#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
|
2012-05-02 21:11:09 +08:00
|
|
|
#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
|
2012-10-03 02:43:38 +08:00
|
|
|
#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)].vm_flush((rdev), (r), (vm))
|
2012-02-24 06:53:43 +08:00
|
|
|
#define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
|
|
|
|
#define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
|
2012-02-24 06:53:47 +08:00
|
|
|
#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
|
2012-08-03 23:39:43 +08:00
|
|
|
#define radeon_set_backlight_level(rdev, e, l) (rdev)->asic->display.set_backlight_level((e), (l))
|
2012-09-14 21:59:26 +08:00
|
|
|
#define radeon_get_backlight_level(rdev, e) (rdev)->asic->display.get_backlight_level((e))
|
2011-10-20 01:02:21 +08:00
|
|
|
#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence))
|
|
|
|
#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
|
2012-02-24 06:53:42 +08:00
|
|
|
#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
|
|
|
|
#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
|
|
|
|
#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
|
|
|
|
#define radeon_copy_blit_ring_index(rdev) (rdev)->asic->copy.blit_ring_index
|
|
|
|
#define radeon_copy_dma_ring_index(rdev) (rdev)->asic->copy.dma_ring_index
|
|
|
|
#define radeon_copy_ring_index(rdev) (rdev)->asic->copy.copy_ring_index
|
2012-02-24 06:53:48 +08:00
|
|
|
#define radeon_get_engine_clock(rdev) (rdev)->asic->pm.get_engine_clock((rdev))
|
|
|
|
#define radeon_set_engine_clock(rdev, e) (rdev)->asic->pm.set_engine_clock((rdev), (e))
|
|
|
|
#define radeon_get_memory_clock(rdev) (rdev)->asic->pm.get_memory_clock((rdev))
|
|
|
|
#define radeon_set_memory_clock(rdev, e) (rdev)->asic->pm.set_memory_clock((rdev), (e))
|
|
|
|
#define radeon_get_pcie_lanes(rdev) (rdev)->asic->pm.get_pcie_lanes((rdev))
|
|
|
|
#define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l))
|
|
|
|
#define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e))
|
2012-02-24 06:53:49 +08:00
|
|
|
#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s)))
|
|
|
|
#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
|
2012-02-24 06:53:47 +08:00
|
|
|
#define radeon_bandwidth_update(rdev) (rdev)->asic->display.bandwidth_update((rdev))
|
2012-02-24 06:53:39 +08:00
|
|
|
#define radeon_hpd_init(rdev) (rdev)->asic->hpd.init((rdev))
|
|
|
|
#define radeon_hpd_fini(rdev) (rdev)->asic->hpd.fini((rdev))
|
|
|
|
#define radeon_hpd_sense(rdev, h) (rdev)->asic->hpd.sense((rdev), (h))
|
|
|
|
#define radeon_hpd_set_polarity(rdev, h) (rdev)->asic->hpd.set_polarity((rdev), (h))
|
2010-04-23 00:39:58 +08:00
|
|
|
#define radeon_gui_idle(rdev) (rdev)->asic->gui_idle((rdev))
|
2012-02-24 06:53:41 +08:00
|
|
|
#define radeon_pm_misc(rdev) (rdev)->asic->pm.misc((rdev))
|
|
|
|
#define radeon_pm_prepare(rdev) (rdev)->asic->pm.prepare((rdev))
|
|
|
|
#define radeon_pm_finish(rdev) (rdev)->asic->pm.finish((rdev))
|
|
|
|
#define radeon_pm_init_profile(rdev) (rdev)->asic->pm.init_profile((rdev))
|
|
|
|
#define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm.get_dynpm_state((rdev))
|
2012-08-03 23:50:54 +08:00
|
|
|
#define radeon_pre_page_flip(rdev, crtc) (rdev)->asic->pflip.pre_page_flip((rdev), (crtc))
|
|
|
|
#define radeon_page_flip(rdev, crtc, base) (rdev)->asic->pflip.page_flip((rdev), (crtc), (base))
|
|
|
|
#define radeon_post_page_flip(rdev, crtc) (rdev)->asic->pflip.post_page_flip((rdev), (crtc))
|
|
|
|
#define radeon_wait_for_vblank(rdev, crtc) (rdev)->asic->display.wait_for_vblank((rdev), (crtc))
|
|
|
|
#define radeon_mc_wait_for_idle(rdev) (rdev)->asic->mc_wait_for_idle((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-11 03:46:48 +08:00
|
|
|
/* Common functions */
|
2010-01-13 22:16:38 +08:00
|
|
|
/* AGP */
|
2010-03-09 22:45:12 +08:00
|
|
|
extern int radeon_gpu_reset(struct radeon_device *rdev);
|
2010-01-13 22:16:38 +08:00
|
|
|
extern void radeon_agp_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern int radeon_modeset_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_modeset_fini(struct radeon_device *rdev);
|
2009-09-11 21:35:22 +08:00
|
|
|
extern bool radeon_card_posted(struct radeon_device *rdev);
|
2010-03-17 08:54:38 +08:00
|
|
|
extern void radeon_update_bandwidth_info(struct radeon_device *rdev);
|
2010-03-31 12:33:27 +08:00
|
|
|
extern void radeon_update_display_priority(struct radeon_device *rdev);
|
2009-12-01 12:06:31 +08:00
|
|
|
extern bool radeon_boot_test_post_card(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_scratch_init(struct radeon_device *rdev);
|
2010-08-28 06:25:25 +08:00
|
|
|
extern void radeon_wb_fini(struct radeon_device *rdev);
|
|
|
|
extern int radeon_wb_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_wb_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_surface_init(struct radeon_device *rdev);
|
|
|
|
extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
|
2009-10-01 16:20:52 +08:00
|
|
|
extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-09-29 00:34:43 +08:00
|
|
|
extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-12-07 22:52:58 +08:00
|
|
|
extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
|
2009-12-15 04:02:09 +08:00
|
|
|
extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
|
|
|
|
extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
|
2010-02-01 13:38:10 +08:00
|
|
|
extern int radeon_resume_kms(struct drm_device *dev);
|
|
|
|
extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
|
2011-03-14 07:47:24 +08:00
|
|
|
extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
|
2009-09-11 03:46:48 +08:00
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* vm
|
|
|
|
*/
|
|
|
|
int radeon_vm_manager_init(struct radeon_device *rdev);
|
|
|
|
void radeon_vm_manager_fini(struct radeon_device *rdev);
|
2012-10-09 19:31:18 +08:00
|
|
|
void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
|
2012-08-10 02:02:28 +08:00
|
|
|
int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm);
|
2012-10-09 19:31:19 +08:00
|
|
|
void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm);
|
2012-08-09 22:21:08 +08:00
|
|
|
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm, int ring);
|
|
|
|
void radeon_vm_fence(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_fence *fence);
|
2012-09-18 01:36:18 +08:00
|
|
|
uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int radeon_vm_bo_update_pte(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo,
|
|
|
|
struct ttm_mem_reg *mem);
|
|
|
|
void radeon_vm_bo_invalidate(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo *bo);
|
2012-09-11 22:10:00 +08:00
|
|
|
struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo);
|
2012-09-11 22:10:04 +08:00
|
|
|
struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo);
|
|
|
|
int radeon_vm_bo_set_addr(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo_va *bo_va,
|
|
|
|
uint64_t offset,
|
|
|
|
uint32_t flags);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int radeon_vm_bo_rmv(struct radeon_device *rdev,
|
2012-09-11 22:10:04 +08:00
|
|
|
struct radeon_bo_va *bo_va);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2012-03-30 20:59:57 +08:00
|
|
|
/* audio */
|
|
|
|
void r600_audio_update_hdmi(struct work_struct *work);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/*
|
|
|
|
* R600 vram scratch functions
|
|
|
|
*/
|
|
|
|
int r600_vram_scratch_init(struct radeon_device *rdev);
|
|
|
|
void r600_vram_scratch_fini(struct radeon_device *rdev);
|
|
|
|
|
2011-12-17 06:03:42 +08:00
|
|
|
/*
|
|
|
|
* r600 cs checking helper
|
|
|
|
*/
|
|
|
|
unsigned r600_mip_minify(unsigned size, unsigned level);
|
|
|
|
bool r600_fmt_is_valid_color(u32 format);
|
|
|
|
bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family);
|
|
|
|
int r600_fmt_get_blocksize(u32 format);
|
|
|
|
int r600_fmt_get_nblocksx(u32 format, u32 w);
|
|
|
|
int r600_fmt_get_nblocksy(u32 format, u32 h);
|
|
|
|
|
2011-02-19 00:59:19 +08:00
|
|
|
/*
|
|
|
|
* r600 functions used by radeon_encoder.c
|
|
|
|
*/
|
2012-04-30 21:44:54 +08:00
|
|
|
struct radeon_hdmi_acr {
|
|
|
|
u32 clock;
|
|
|
|
|
|
|
|
int n_32khz;
|
|
|
|
int cts_32khz;
|
|
|
|
|
|
|
|
int n_44_1khz;
|
|
|
|
int cts_44_1khz;
|
|
|
|
|
|
|
|
int n_48khz;
|
|
|
|
int cts_48khz;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2012-05-06 23:29:44 +08:00
|
|
|
extern struct radeon_hdmi_acr r600_hdmi_acr(uint32_t clock);
|
|
|
|
|
2010-03-09 06:14:01 +08:00
|
|
|
extern void r600_hdmi_enable(struct drm_encoder *encoder);
|
|
|
|
extern void r600_hdmi_disable(struct drm_encoder *encoder);
|
2009-10-12 05:49:13 +08:00
|
|
|
extern void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
|
2012-06-01 07:00:25 +08:00
|
|
|
extern u32 r6xx_remap_render_backend(struct radeon_device *rdev,
|
|
|
|
u32 tiling_pipe_num,
|
|
|
|
u32 max_rb_num,
|
|
|
|
u32 total_max_rb_num,
|
|
|
|
u32 enabled_rb_mask);
|
2010-03-25 01:36:43 +08:00
|
|
|
|
2012-05-06 23:29:44 +08:00
|
|
|
/*
|
|
|
|
* evergreen functions used by radeon_encoder.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
extern void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
|
|
|
|
|
2011-01-07 10:19:31 +08:00
|
|
|
extern int ni_init_microcode(struct radeon_device *rdev);
|
2011-03-03 09:07:34 +08:00
|
|
|
extern int ni_mc_load_microcode(struct radeon_device *rdev);
|
2011-01-07 10:19:31 +08:00
|
|
|
|
2012-08-01 05:14:35 +08:00
|
|
|
/* radeon_acpi.c */
|
|
|
|
#if defined(CONFIG_ACPI)
|
|
|
|
extern int radeon_acpi_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_acpi_fini(struct radeon_device *rdev);
|
|
|
|
#else
|
|
|
|
static inline int radeon_acpi_init(struct radeon_device *rdev) { return 0; }
|
|
|
|
static inline void radeon_acpi_fini(struct radeon_device *rdev) { }
|
|
|
|
#endif
|
2010-07-06 23:40:24 +08:00
|
|
|
|
2013-01-03 07:27:41 +08:00
|
|
|
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt,
|
|
|
|
unsigned idx);
|
2013-01-03 07:27:42 +08:00
|
|
|
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p);
|
2013-01-03 07:27:45 +08:00
|
|
|
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt);
|
2013-01-03 07:27:43 +08:00
|
|
|
int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
|
|
|
|
uint32_t *vline_start_end,
|
|
|
|
uint32_t *vline_status);
|
2013-01-03 07:27:41 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include "radeon_object.h"
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
#endif
|