2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2008 Advanced Micro Devices, Inc.
|
|
|
|
* Copyright 2008 Red Hat Inc.
|
|
|
|
* Copyright 2009 Jerome Glisse.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors: Dave Airlie
|
|
|
|
* Alex Deucher
|
|
|
|
* Jerome Glisse
|
|
|
|
*/
|
|
|
|
#ifndef __RADEON_H__
|
|
|
|
#define __RADEON_H__
|
|
|
|
|
|
|
|
/* TODO: Here are things that needs to be done :
|
|
|
|
* - surface allocator & initializer : (bit like scratch reg) should
|
|
|
|
* initialize HDP_ stuff on RS600, R600, R700 hw, well anythings
|
|
|
|
* related to surface
|
|
|
|
* - WB : write back stuff (do it bit like scratch reg things)
|
|
|
|
* - Vblank : look at Jesse's rework and what we should do
|
|
|
|
* - r600/r700: gart & cp
|
|
|
|
* - cs : clean cs ioctl use bitmap & things like that.
|
|
|
|
* - power management stuff
|
|
|
|
* - Barrier in gart code
|
|
|
|
* - Unmappabled vram ?
|
|
|
|
* - TESTING, TESTING, TESTING
|
|
|
|
*/
|
|
|
|
|
2009-09-29 00:34:43 +08:00
|
|
|
/* Initialization path:
|
|
|
|
* We expect that acceleration initialization might fail for various
|
|
|
|
* reasons even thought we work hard to make it works on most
|
|
|
|
* configurations. In order to still have a working userspace in such
|
|
|
|
* situation the init path must succeed up to the memory controller
|
|
|
|
* initialization point. Failure before this point are considered as
|
|
|
|
* fatal error. Here is the init callchain :
|
|
|
|
* radeon_device_init perform common structure, mutex initialization
|
|
|
|
* asic_init setup the GPU memory layout and perform all
|
|
|
|
* one time initialization (failure in this
|
|
|
|
* function are considered fatal)
|
|
|
|
* asic_startup setup the GPU acceleration, in order to
|
|
|
|
* follow guideline the first thing this
|
|
|
|
* function should do is setting the GPU
|
|
|
|
* memory controller (only MC setup failure
|
|
|
|
* are considered as fatal)
|
|
|
|
*/
|
|
|
|
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2009-06-05 20:42:42 +08:00
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/kref.h>
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include <ttm/ttm_bo_api.h>
|
|
|
|
#include <ttm/ttm_bo_driver.h>
|
|
|
|
#include <ttm/ttm_placement.h>
|
|
|
|
#include <ttm/ttm_module.h>
|
2010-11-17 20:38:32 +08:00
|
|
|
#include <ttm/ttm_execbuf_util.h>
|
2009-11-20 21:29:23 +08:00
|
|
|
|
2009-09-22 06:50:10 +08:00
|
|
|
#include "radeon_family.h"
|
2009-06-05 20:42:42 +08:00
|
|
|
#include "radeon_mode.h"
|
|
|
|
#include "radeon_reg.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Modules parameters.
|
|
|
|
*/
|
|
|
|
extern int radeon_no_wb;
|
|
|
|
extern int radeon_modeset;
|
|
|
|
extern int radeon_dynclks;
|
|
|
|
extern int radeon_r4xx_atom;
|
|
|
|
extern int radeon_agpmode;
|
|
|
|
extern int radeon_vram_limit;
|
|
|
|
extern int radeon_gart_size;
|
|
|
|
extern int radeon_benchmarking;
|
2009-07-21 17:23:57 +08:00
|
|
|
extern int radeon_testing;
|
2009-06-05 20:42:42 +08:00
|
|
|
extern int radeon_connector_table;
|
2009-08-13 14:32:14 +08:00
|
|
|
extern int radeon_tv;
|
2009-10-12 05:49:13 +08:00
|
|
|
extern int radeon_audio;
|
2010-03-31 12:33:27 +08:00
|
|
|
extern int radeon_disp_priority;
|
2010-03-17 14:07:37 +08:00
|
|
|
extern int radeon_hw_i2c;
|
2011-01-13 09:05:11 +08:00
|
|
|
extern int radeon_pcie_gen2;
|
2011-11-02 02:20:30 +08:00
|
|
|
extern int radeon_msi;
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy from radeon_drv.h so we don't have to include both and have conflicting
|
|
|
|
* symbol;
|
|
|
|
*/
|
|
|
|
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
2010-03-09 22:45:10 +08:00
|
|
|
#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
|
2010-02-16 04:36:13 +08:00
|
|
|
/* RADEON_IB_POOL_SIZE must be a power of 2 */
|
2009-06-05 20:42:42 +08:00
|
|
|
#define RADEON_IB_POOL_SIZE 16
|
2011-09-17 04:45:30 +08:00
|
|
|
#define RADEON_DEBUGFS_MAX_COMPONENTS 32
|
2009-06-05 20:42:42 +08:00
|
|
|
#define RADEONFB_CONN_LIMIT 4
|
2009-09-15 10:21:01 +08:00
|
|
|
#define RADEON_BIOS_NUM_SCRATCH 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-11-18 09:13:28 +08:00
|
|
|
/* max number of rings */
|
|
|
|
#define RADEON_NUM_RINGS 3
|
|
|
|
|
|
|
|
/* internal ring indices */
|
|
|
|
/* r1xx+ has gfx CP ring */
|
|
|
|
#define RADEON_RING_TYPE_GFX_INDEX 0
|
|
|
|
|
|
|
|
/* cayman has 2 compute CP rings */
|
|
|
|
#define CAYMAN_RING_TYPE_CP1_INDEX 1
|
|
|
|
#define CAYMAN_RING_TYPE_CP2_INDEX 2
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* hardcode those limit for now */
|
|
|
|
#define RADEON_VA_RESERVED_SIZE (8 << 20)
|
|
|
|
#define RADEON_IB_VM_MAX_SIZE (64 << 10)
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Errata workarounds.
|
|
|
|
*/
|
|
|
|
enum radeon_pll_errata {
|
|
|
|
CHIP_ERRATA_R300_CG = 0x00000001,
|
|
|
|
CHIP_ERRATA_PLL_DUMMYREADS = 0x00000002,
|
|
|
|
CHIP_ERRATA_PLL_DELAY = 0x00000004
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct radeon_device;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BIOS.
|
|
|
|
*/
|
2010-02-01 13:38:10 +08:00
|
|
|
#define ATRM_BIOS_PAGE 4096
|
|
|
|
|
2010-03-01 18:50:01 +08:00
|
|
|
#if defined(CONFIG_VGA_SWITCHEROO)
|
2010-02-01 13:38:10 +08:00
|
|
|
bool radeon_atrm_supported(struct pci_dev *pdev);
|
|
|
|
int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
|
2010-03-01 18:50:01 +08:00
|
|
|
#else
|
|
|
|
static inline bool radeon_atrm_supported(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
#endif
|
2009-06-05 20:42:42 +08:00
|
|
|
bool radeon_get_bios(struct radeon_device *rdev);
|
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
|
2012-01-24 00:52:15 +08:00
|
|
|
/*
|
|
|
|
* Mutex which allows recursive locking from the same process.
|
|
|
|
*/
|
|
|
|
struct radeon_mutex {
|
|
|
|
struct mutex mutex;
|
|
|
|
struct task_struct *owner;
|
|
|
|
int level;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void radeon_mutex_init(struct radeon_mutex *mutex)
|
|
|
|
{
|
|
|
|
mutex_init(&mutex->mutex);
|
|
|
|
mutex->owner = NULL;
|
|
|
|
mutex->level = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void radeon_mutex_lock(struct radeon_mutex *mutex)
|
|
|
|
{
|
|
|
|
if (mutex_trylock(&mutex->mutex)) {
|
|
|
|
/* The mutex was unlocked before, so it's ours now */
|
|
|
|
mutex->owner = current;
|
|
|
|
} else if (mutex->owner != current) {
|
|
|
|
/* Another process locked the mutex, take it */
|
|
|
|
mutex_lock(&mutex->mutex);
|
|
|
|
mutex->owner = current;
|
|
|
|
}
|
|
|
|
/* Otherwise the mutex was already locked by this process */
|
|
|
|
|
|
|
|
mutex->level++;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void radeon_mutex_unlock(struct radeon_mutex *mutex)
|
|
|
|
{
|
|
|
|
if (--mutex->level > 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
mutex->owner = NULL;
|
|
|
|
mutex_unlock(&mutex->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
2009-09-08 08:10:24 +08:00
|
|
|
* Dummy page
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page {
|
|
|
|
struct page *page;
|
|
|
|
dma_addr_t addr;
|
|
|
|
};
|
|
|
|
int radeon_dummy_page_init(struct radeon_device *rdev);
|
|
|
|
void radeon_dummy_page_fini(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
/*
|
|
|
|
* Clocks
|
|
|
|
*/
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock {
|
|
|
|
struct radeon_pll p1pll;
|
|
|
|
struct radeon_pll p2pll;
|
2010-01-13 06:54:34 +08:00
|
|
|
struct radeon_pll dcpll;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_pll spll;
|
|
|
|
struct radeon_pll mpll;
|
|
|
|
/* 10 Khz units */
|
|
|
|
uint32_t default_mclk;
|
|
|
|
uint32_t default_sclk;
|
2010-01-13 06:54:34 +08:00
|
|
|
uint32_t default_dispclk;
|
|
|
|
uint32_t dp_extclk;
|
2011-06-09 01:01:11 +08:00
|
|
|
uint32_t max_pixel_clock;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-11-03 07:53:02 +08:00
|
|
|
/*
|
|
|
|
* Power management
|
|
|
|
*/
|
|
|
|
int radeon_pm_init(struct radeon_device *rdev);
|
2010-03-11 23:01:17 +08:00
|
|
|
void radeon_pm_fini(struct radeon_device *rdev);
|
2009-12-23 06:02:16 +08:00
|
|
|
void radeon_pm_compute_clocks(struct radeon_device *rdev);
|
2010-05-08 03:10:16 +08:00
|
|
|
void radeon_pm_suspend(struct radeon_device *rdev);
|
|
|
|
void radeon_pm_resume(struct radeon_device *rdev);
|
2009-12-29 02:58:44 +08:00
|
|
|
void radeon_combios_get_power_modes(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_get_power_modes(struct radeon_device *rdev);
|
2011-04-13 02:49:23 +08:00
|
|
|
void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
|
2011-06-24 00:19:32 +08:00
|
|
|
int radeon_atom_get_max_vddc(struct radeon_device *rdev, u16 *voltage);
|
2010-07-01 00:02:03 +08:00
|
|
|
void rs690_pm_info(struct radeon_device *rdev);
|
2011-02-02 05:12:34 +08:00
|
|
|
extern int rv6xx_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int rv770_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int evergreen_get_temp(struct radeon_device *rdev);
|
|
|
|
extern int sumo_get_temp(struct radeon_device *rdev);
|
2009-09-08 08:10:24 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Fences.
|
|
|
|
*/
|
|
|
|
struct radeon_fence_driver {
|
|
|
|
uint32_t scratch_reg;
|
2011-11-21 04:45:34 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
volatile uint32_t *cpu_addr;
|
2009-06-05 20:42:42 +08:00
|
|
|
atomic_t seq;
|
|
|
|
uint32_t last_seq;
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned long last_jiffies;
|
|
|
|
unsigned long last_timeout;
|
2009-06-05 20:42:42 +08:00
|
|
|
wait_queue_head_t queue;
|
|
|
|
struct list_head created;
|
2011-10-24 21:05:29 +08:00
|
|
|
struct list_head emitted;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct list_head signaled;
|
2009-12-12 03:36:19 +08:00
|
|
|
bool initialized;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_fence {
|
|
|
|
struct radeon_device *rdev;
|
|
|
|
struct kref kref;
|
|
|
|
struct list_head list;
|
|
|
|
/* protected by radeon_fence.lock */
|
|
|
|
uint32_t seq;
|
2011-10-24 21:05:29 +08:00
|
|
|
bool emitted;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool signaled;
|
2011-08-26 01:39:48 +08:00
|
|
|
/* RB, DMA, etc. */
|
|
|
|
int ring;
|
2012-01-06 11:11:06 +08:00
|
|
|
struct radeon_semaphore *semaphore;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-11-21 04:45:34 +08:00
|
|
|
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
|
|
|
|
int radeon_fence_driver_init(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_fence_driver_fini(struct radeon_device *rdev);
|
2011-08-26 01:39:48 +08:00
|
|
|
int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
|
2011-08-26 01:39:48 +08:00
|
|
|
void radeon_fence_process(struct radeon_device *rdev, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
bool radeon_fence_signaled(struct radeon_fence *fence);
|
|
|
|
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
|
2011-08-26 01:39:48 +08:00
|
|
|
int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
|
|
|
|
int radeon_fence_wait_last(struct radeon_device *rdev, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
|
|
|
|
void radeon_fence_unref(struct radeon_fence **fence);
|
2011-10-20 18:38:09 +08:00
|
|
|
int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-06-24 07:48:08 +08:00
|
|
|
/*
|
|
|
|
* Tiling registers
|
|
|
|
*/
|
|
|
|
struct radeon_surface_reg {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *bo;
|
2009-06-24 07:48:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define RADEON_GEM_MAX_SURFACES 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2009-11-20 21:29:23 +08:00
|
|
|
* TTM.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_mman {
|
|
|
|
struct ttm_bo_global_ref bo_global_ref;
|
2010-03-09 08:56:52 +08:00
|
|
|
struct drm_global_reference mem_global_ref;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_bo_device bdev;
|
2009-12-12 03:36:19 +08:00
|
|
|
bool mem_global_referenced;
|
|
|
|
bool initialized;
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* bo virtual address in a specific vm */
|
|
|
|
struct radeon_bo_va {
|
|
|
|
/* bo list is protected by bo being reserved */
|
|
|
|
struct list_head bo_list;
|
|
|
|
/* vm list is protected by vm mutex */
|
|
|
|
struct list_head vm_list;
|
|
|
|
/* constant after initialization */
|
|
|
|
struct radeon_vm *vm;
|
|
|
|
struct radeon_bo *bo;
|
|
|
|
uint64_t soffset;
|
|
|
|
uint64_t eoffset;
|
|
|
|
uint32_t flags;
|
|
|
|
bool valid;
|
|
|
|
};
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo {
|
|
|
|
/* Protected by gem.mutex */
|
|
|
|
struct list_head list;
|
|
|
|
/* Protected by tbo.reserved */
|
2009-12-07 22:52:58 +08:00
|
|
|
u32 placements[3];
|
|
|
|
struct ttm_placement placement;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_buffer_object tbo;
|
|
|
|
struct ttm_bo_kmap_obj kmap;
|
|
|
|
unsigned pin_count;
|
|
|
|
void *kptr;
|
|
|
|
u32 tiling_flags;
|
|
|
|
u32 pitch;
|
|
|
|
int surface_reg;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* list of all virtual address to which this bo
|
|
|
|
* is associated to
|
|
|
|
*/
|
|
|
|
struct list_head va;
|
2009-11-20 21:29:23 +08:00
|
|
|
/* Constant after initialization */
|
|
|
|
struct radeon_device *rdev;
|
2011-02-19 00:59:16 +08:00
|
|
|
struct drm_gem_object gem_base;
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
2011-02-19 00:59:17 +08:00
|
|
|
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo_list {
|
2010-11-17 20:38:32 +08:00
|
|
|
struct ttm_validate_buffer tv;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *bo;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint64_t gpu_offset;
|
|
|
|
unsigned rdomain;
|
|
|
|
unsigned wdomain;
|
2009-11-20 21:29:23 +08:00
|
|
|
u32 tiling_flags;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
/* sub-allocation manager, it has to be protected by another lock.
|
|
|
|
* By conception this is an helper for other part of the driver
|
|
|
|
* like the indirect buffer or semaphore, which both have their
|
|
|
|
* locking.
|
|
|
|
*
|
|
|
|
* Principe is simple, we keep a list of sub allocation in offset
|
|
|
|
* order (first entry has offset == 0, last entry has the highest
|
|
|
|
* offset).
|
|
|
|
*
|
|
|
|
* When allocating new object we first check if there is room at
|
|
|
|
* the end total_size - (last_object_offset + last_object_size) >=
|
|
|
|
* alloc_size. If so we allocate new object there.
|
|
|
|
*
|
|
|
|
* When there is not enough room at the end, we start waiting for
|
|
|
|
* each sub object until we reach object_offset+object_size >=
|
|
|
|
* alloc_size, this object then become the sub object we return.
|
|
|
|
*
|
|
|
|
* Alignment can't be bigger than page size.
|
|
|
|
*
|
|
|
|
* Hole are not considered for allocation to keep things simple.
|
|
|
|
* Assumption is that there won't be hole (all object on same
|
|
|
|
* alignment).
|
|
|
|
*/
|
|
|
|
struct radeon_sa_manager {
|
|
|
|
struct radeon_bo *bo;
|
|
|
|
struct list_head sa_bo;
|
|
|
|
unsigned size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
void *cpu_ptr;
|
|
|
|
uint32_t domain;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_sa_bo;
|
|
|
|
|
|
|
|
/* sub-allocation buffer */
|
|
|
|
struct radeon_sa_bo {
|
|
|
|
struct list_head list;
|
|
|
|
struct radeon_sa_manager *manager;
|
|
|
|
unsigned offset;
|
|
|
|
unsigned size;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GEM objects.
|
|
|
|
*/
|
|
|
|
struct radeon_gem {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct mutex mutex;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct list_head objects;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gem_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gem_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_gem_object_create(struct radeon_device *rdev, int size,
|
2009-11-20 21:29:23 +08:00
|
|
|
int alignment, int initial_domain,
|
|
|
|
bool discardable, bool kernel,
|
|
|
|
struct drm_gem_object **obj);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_gem_object_pin(struct drm_gem_object *obj, uint32_t pin_domain,
|
|
|
|
uint64_t *gpu_addr);
|
|
|
|
void radeon_gem_object_unpin(struct drm_gem_object *obj);
|
|
|
|
|
2011-02-07 10:16:14 +08:00
|
|
|
int radeon_mode_dumb_create(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
struct drm_mode_create_dumb *args);
|
|
|
|
int radeon_mode_dumb_mmap(struct drm_file *filp,
|
|
|
|
struct drm_device *dev,
|
|
|
|
uint32_t handle, uint64_t *offset_p);
|
|
|
|
int radeon_mode_dumb_destroy(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
uint32_t handle);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-12-22 01:13:47 +08:00
|
|
|
/*
|
|
|
|
* Semaphores.
|
|
|
|
*/
|
|
|
|
struct radeon_ring;
|
|
|
|
|
|
|
|
#define RADEON_SEMAPHORE_BO_SIZE 256
|
|
|
|
|
|
|
|
struct radeon_semaphore_driver {
|
|
|
|
rwlock_t lock;
|
|
|
|
struct list_head bo;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_semaphore_bo;
|
|
|
|
|
|
|
|
/* everything here is constant */
|
|
|
|
struct radeon_semaphore {
|
|
|
|
struct list_head list;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t *cpu_ptr;
|
|
|
|
struct radeon_semaphore_bo *bo;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_semaphore_bo {
|
|
|
|
struct list_head list;
|
|
|
|
struct radeon_ib *ib;
|
|
|
|
struct list_head free;
|
|
|
|
struct radeon_semaphore semaphores[RADEON_SEMAPHORE_BO_SIZE/8];
|
|
|
|
unsigned nused;
|
|
|
|
};
|
|
|
|
|
|
|
|
void radeon_semaphore_driver_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_semaphore_create(struct radeon_device *rdev,
|
|
|
|
struct radeon_semaphore **semaphore);
|
|
|
|
void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
|
|
|
struct radeon_semaphore *semaphore);
|
|
|
|
void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
|
|
|
struct radeon_semaphore *semaphore);
|
|
|
|
void radeon_semaphore_free(struct radeon_device *rdev,
|
|
|
|
struct radeon_semaphore *semaphore);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GART structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc;
|
|
|
|
|
2009-10-14 12:34:41 +08:00
|
|
|
#define RADEON_GPU_PAGE_SIZE 4096
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
|
2011-09-17 00:04:08 +08:00
|
|
|
#define RADEON_GPU_PAGE_SHIFT 12
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
#define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK)
|
2009-10-14 12:34:41 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_gart {
|
|
|
|
dma_addr_t table_addr;
|
2011-11-03 23:16:49 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
void *ptr;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned num_gpu_pages;
|
|
|
|
unsigned num_cpu_pages;
|
|
|
|
unsigned table_size;
|
|
|
|
struct page **pages;
|
|
|
|
dma_addr_t *pages_addr;
|
|
|
|
bool ready;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gart_table_ram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_ram_free(struct radeon_device *rdev);
|
|
|
|
int radeon_gart_table_vram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_free(struct radeon_device *rdev);
|
2011-11-03 23:16:49 +08:00
|
|
|
int radeon_gart_table_vram_pin(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_unpin(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_gart_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_fini(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
|
|
int pages);
|
|
|
|
int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
2010-12-03 00:04:29 +08:00
|
|
|
int pages, struct page **pagelist,
|
|
|
|
dma_addr_t *dma_addr);
|
2011-11-03 23:16:49 +08:00
|
|
|
void radeon_gart_restore(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU MC structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc {
|
|
|
|
resource_size_t aper_size;
|
|
|
|
resource_size_t aper_base;
|
|
|
|
resource_size_t agp_base;
|
2009-07-21 18:39:30 +08:00
|
|
|
/* for some chips with <= 32MB we need to lie
|
|
|
|
* about vram size near mc fb location */
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 mc_vram_size;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
u64 visible_vram_size;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 gtt_size;
|
|
|
|
u64 gtt_start;
|
|
|
|
u64 gtt_end;
|
|
|
|
u64 vram_start;
|
|
|
|
u64 vram_end;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned vram_width;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 real_vram_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
int vram_mtrr;
|
|
|
|
bool vram_is_ddr;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
bool igp_sideport_enabled;
|
2010-07-15 22:51:10 +08:00
|
|
|
u64 gtt_base_align;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-01-06 00:27:29 +08:00
|
|
|
bool radeon_combios_sideport_present(struct radeon_device *rdev);
|
|
|
|
bool radeon_atombios_sideport_present(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU scratch registers structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_scratch {
|
|
|
|
unsigned num_reg;
|
2010-08-28 06:25:25 +08:00
|
|
|
uint32_t reg_base;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool free[32];
|
|
|
|
uint32_t reg[32];
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg);
|
|
|
|
void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* IRQS.
|
|
|
|
*/
|
2010-11-21 23:59:01 +08:00
|
|
|
|
|
|
|
struct radeon_unpin_work {
|
|
|
|
struct work_struct work;
|
|
|
|
struct radeon_device *rdev;
|
|
|
|
int crtc_id;
|
|
|
|
struct radeon_fence *fence;
|
|
|
|
struct drm_pending_vblank_event *event;
|
|
|
|
struct radeon_bo *old_rbo;
|
|
|
|
u64 new_crtc_base;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct r500_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct evergreen_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 disp_int_cont3;
|
|
|
|
u32 disp_int_cont4;
|
|
|
|
u32 disp_int_cont5;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
|
|
|
u32 d3grph_int;
|
|
|
|
u32 d4grph_int;
|
|
|
|
u32 d5grph_int;
|
|
|
|
u32 d6grph_int;
|
|
|
|
};
|
|
|
|
|
|
|
|
union radeon_irq_stat_regs {
|
|
|
|
struct r500_irq_stat_regs r500;
|
|
|
|
struct r600_irq_stat_regs r600;
|
|
|
|
struct evergreen_irq_stat_regs evergreen;
|
|
|
|
};
|
|
|
|
|
2011-10-27 03:43:58 +08:00
|
|
|
#define RADEON_MAX_HPD_PINS 6
|
|
|
|
#define RADEON_MAX_CRTCS 6
|
|
|
|
#define RADEON_MAX_HDMI_BLOCKS 2
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_irq {
|
|
|
|
bool installed;
|
2011-11-18 09:13:28 +08:00
|
|
|
bool sw_int[RADEON_NUM_RINGS];
|
2011-10-27 03:43:58 +08:00
|
|
|
bool crtc_vblank_int[RADEON_MAX_CRTCS];
|
|
|
|
bool pflip[RADEON_MAX_CRTCS];
|
2010-01-08 07:22:47 +08:00
|
|
|
wait_queue_head_t vblank_queue;
|
2011-10-27 03:43:58 +08:00
|
|
|
bool hpd[RADEON_MAX_HPD_PINS];
|
2010-04-23 00:52:11 +08:00
|
|
|
bool gui_idle;
|
|
|
|
bool gui_idle_acked;
|
|
|
|
wait_queue_head_t idle_queue;
|
2011-10-27 03:43:58 +08:00
|
|
|
bool hdmi[RADEON_MAX_HDMI_BLOCKS];
|
2009-12-01 14:04:56 +08:00
|
|
|
spinlock_t sw_lock;
|
2011-11-18 09:13:28 +08:00
|
|
|
int sw_refcount[RADEON_NUM_RINGS];
|
2010-11-21 23:59:01 +08:00
|
|
|
union radeon_irq_stat_regs stat_regs;
|
2011-10-27 03:43:58 +08:00
|
|
|
spinlock_t pflip_lock[RADEON_MAX_CRTCS];
|
|
|
|
int pflip_refcount[RADEON_MAX_CRTCS];
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_irq_kms_init(struct radeon_device *rdev);
|
|
|
|
void radeon_irq_kms_fini(struct radeon_device *rdev);
|
2011-11-18 09:13:28 +08:00
|
|
|
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
|
|
|
|
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
|
2010-11-21 23:59:01 +08:00
|
|
|
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
|
|
|
|
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2011-10-23 18:56:27 +08:00
|
|
|
* CP & rings.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2011-08-26 01:39:48 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib {
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_sa_bo sa_bo;
|
2010-02-16 04:36:13 +08:00
|
|
|
unsigned idx;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
uint32_t length_dw;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint64_t gpu_addr;
|
2010-02-16 04:36:13 +08:00
|
|
|
uint32_t *ptr;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_fence *fence;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
unsigned vm_id;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-15 09:12:56 +08:00
|
|
|
/*
|
|
|
|
* locking -
|
|
|
|
* mutex protects scheduled_ibs, ready, alloc_bm
|
|
|
|
*/
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib_pool {
|
2012-01-24 00:52:15 +08:00
|
|
|
struct radeon_mutex mutex;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_sa_manager sa_manager;
|
|
|
|
struct radeon_ib ibs[RADEON_IB_POOL_SIZE];
|
|
|
|
bool ready;
|
|
|
|
unsigned head_id;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *ring;
|
|
|
|
unsigned rptr;
|
2011-10-13 18:48:45 +08:00
|
|
|
unsigned rptr_offs;
|
|
|
|
unsigned rptr_reg;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned wptr;
|
|
|
|
unsigned wptr_old;
|
2011-10-13 18:48:45 +08:00
|
|
|
unsigned wptr_reg;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned ring_size;
|
|
|
|
unsigned ring_free_dw;
|
|
|
|
int count_dw;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t align_mask;
|
|
|
|
uint32_t ptr_mask;
|
|
|
|
struct mutex mutex;
|
|
|
|
bool ready;
|
2011-11-18 03:25:56 +08:00
|
|
|
u32 ptr_reg_shift;
|
|
|
|
u32 ptr_reg_mask;
|
|
|
|
u32 nop;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* VM
|
|
|
|
*/
|
|
|
|
struct radeon_vm {
|
|
|
|
struct list_head list;
|
|
|
|
struct list_head va;
|
|
|
|
int id;
|
|
|
|
unsigned last_pfn;
|
|
|
|
u64 pt_gpu_addr;
|
|
|
|
u64 *pt;
|
|
|
|
struct radeon_sa_bo sa_bo;
|
|
|
|
struct mutex mutex;
|
|
|
|
/* last fence for cs using this vm */
|
|
|
|
struct radeon_fence *fence;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_vm_funcs {
|
|
|
|
int (*init)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
/* cs mutex must be lock for schedule_ib */
|
|
|
|
int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
|
|
|
|
void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
uint32_t (*page_flags)(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
uint32_t flags);
|
|
|
|
void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm,
|
|
|
|
unsigned pfn, uint64_t addr, uint32_t flags);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_vm_manager {
|
|
|
|
struct list_head lru_vm;
|
|
|
|
uint32_t use_bitmap;
|
|
|
|
struct radeon_sa_manager sa_manager;
|
|
|
|
uint32_t max_pfn;
|
|
|
|
/* fields constant after init */
|
|
|
|
const struct radeon_vm_funcs *funcs;
|
|
|
|
/* number of VMIDs */
|
|
|
|
unsigned nvm;
|
|
|
|
/* vram base address for page table entry */
|
|
|
|
u64 vram_base_offset;
|
2012-01-06 22:38:15 +08:00
|
|
|
/* is vm enabled? */
|
|
|
|
bool enabled;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* file private structure
|
|
|
|
*/
|
|
|
|
struct radeon_fpriv {
|
|
|
|
struct radeon_vm vm;
|
|
|
|
};
|
|
|
|
|
2009-12-02 02:43:46 +08:00
|
|
|
/*
|
|
|
|
* R6xx+ IH ring
|
|
|
|
*/
|
|
|
|
struct r600_ih {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-12-02 02:43:46 +08:00
|
|
|
volatile uint32_t *ring;
|
|
|
|
unsigned rptr;
|
2011-10-13 19:19:22 +08:00
|
|
|
unsigned rptr_offs;
|
2009-12-02 02:43:46 +08:00
|
|
|
unsigned wptr;
|
|
|
|
unsigned wptr_old;
|
|
|
|
unsigned ring_size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t ptr_mask;
|
|
|
|
spinlock_t lock;
|
|
|
|
bool enabled;
|
|
|
|
};
|
|
|
|
|
2011-10-13 11:29:40 +08:00
|
|
|
struct r600_blit_cp_primitives {
|
|
|
|
void (*set_render_target)(struct radeon_device *rdev, int format,
|
|
|
|
int w, int h, u64 gpu_addr);
|
|
|
|
void (*cp_set_surface_sync)(struct radeon_device *rdev,
|
|
|
|
u32 sync_type, u32 size,
|
|
|
|
u64 mc_addr);
|
|
|
|
void (*set_shaders)(struct radeon_device *rdev);
|
|
|
|
void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
|
|
|
|
void (*set_tex_resource)(struct radeon_device *rdev,
|
|
|
|
int format, int w, int h, int pitch,
|
2011-10-22 22:07:09 +08:00
|
|
|
u64 gpu_addr, u32 size);
|
2011-10-13 11:29:40 +08:00
|
|
|
void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
|
|
|
|
int x2, int y2);
|
|
|
|
void (*draw_auto)(struct radeon_device *rdev);
|
|
|
|
void (*set_default_state)(struct radeon_device *rdev);
|
|
|
|
};
|
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_blit {
|
2010-01-22 22:19:00 +08:00
|
|
|
struct mutex mutex;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *shader_obj;
|
2011-10-13 11:29:40 +08:00
|
|
|
struct r600_blit_cp_primitives primitives;
|
|
|
|
int max_dim;
|
|
|
|
int ring_size_common;
|
|
|
|
int ring_size_per_loop;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 shader_gpu_addr;
|
|
|
|
u32 vs_offset, ps_offset;
|
|
|
|
u32 state_offset;
|
|
|
|
u32 state_len;
|
|
|
|
u32 vb_used, vb_total;
|
|
|
|
struct radeon_ib *vb_ib;
|
|
|
|
};
|
|
|
|
|
2011-10-14 22:51:22 +08:00
|
|
|
void r600_blit_suspend(struct radeon_device *rdev);
|
|
|
|
|
2011-12-22 01:13:46 +08:00
|
|
|
int radeon_ib_get(struct radeon_device *rdev, int ring,
|
|
|
|
struct radeon_ib **ib, unsigned size);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
|
2011-12-22 01:13:47 +08:00
|
|
|
bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
|
|
|
|
int radeon_ib_pool_init(struct radeon_device *rdev);
|
|
|
|
void radeon_ib_pool_fini(struct radeon_device *rdev);
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
int radeon_ib_pool_start(struct radeon_device *rdev);
|
|
|
|
int radeon_ib_pool_suspend(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_ib_test(struct radeon_device *rdev);
|
|
|
|
/* Ring access between begin & end cannot sleep */
|
2011-10-23 18:56:27 +08:00
|
|
|
int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
|
|
|
int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
|
|
|
void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
|
2011-11-18 03:25:56 +08:00
|
|
|
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
|
|
|
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CS.
|
|
|
|
*/
|
|
|
|
struct radeon_cs_reloc {
|
|
|
|
struct drm_gem_object *gobj;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
struct radeon_bo_list lobj;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint32_t handle;
|
|
|
|
uint32_t flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cs_chunk {
|
|
|
|
uint32_t chunk_id;
|
|
|
|
uint32_t length_dw;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int kpage_idx[2];
|
|
|
|
uint32_t *kpage[2];
|
2009-06-05 20:42:42 +08:00
|
|
|
uint32_t *kdata;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void __user *user_ptr;
|
|
|
|
int last_copied_page;
|
|
|
|
int last_page_index;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cs_parser {
|
2010-01-18 20:01:36 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_device *rdev;
|
|
|
|
struct drm_file *filp;
|
|
|
|
/* chunks */
|
|
|
|
unsigned nchunks;
|
|
|
|
struct radeon_cs_chunk *chunks;
|
|
|
|
uint64_t *chunks_array;
|
|
|
|
/* IB */
|
|
|
|
unsigned idx;
|
|
|
|
/* relocations */
|
|
|
|
unsigned nrelocs;
|
|
|
|
struct radeon_cs_reloc *relocs;
|
|
|
|
struct radeon_cs_reloc **relocs_ptr;
|
|
|
|
struct list_head validated;
|
2012-01-06 11:11:06 +08:00
|
|
|
bool sync_to_ring[RADEON_NUM_RINGS];
|
2009-06-05 20:42:42 +08:00
|
|
|
/* indices of various chunks */
|
|
|
|
int chunk_ib_idx;
|
|
|
|
int chunk_relocs_idx;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int chunk_flags_idx;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib *ib;
|
|
|
|
void *track;
|
2009-09-08 08:10:24 +08:00
|
|
|
unsigned family;
|
2011-10-25 07:38:45 +08:00
|
|
|
int parser_error;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
u32 cs_flags;
|
|
|
|
u32 ring;
|
|
|
|
s32 priority;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-23 14:56:27 +08:00
|
|
|
extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
|
|
|
|
extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
|
2011-10-14 07:08:47 +08:00
|
|
|
extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
|
2009-09-23 14:56:27 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_cs_packet {
|
|
|
|
unsigned idx;
|
|
|
|
unsigned type;
|
|
|
|
unsigned reg;
|
|
|
|
unsigned opcode;
|
|
|
|
int count;
|
|
|
|
unsigned one_reg_wr;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef int (*radeon_packet0_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt,
|
|
|
|
unsigned idx, unsigned reg);
|
|
|
|
typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AGP
|
|
|
|
*/
|
|
|
|
int radeon_agp_init(struct radeon_device *rdev);
|
2009-11-05 13:39:10 +08:00
|
|
|
void radeon_agp_resume(struct radeon_device *rdev);
|
2010-05-22 00:48:54 +08:00
|
|
|
void radeon_agp_suspend(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_agp_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Writeback
|
|
|
|
*/
|
|
|
|
struct radeon_wb {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *wb_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *wb;
|
|
|
|
uint64_t gpu_addr;
|
2010-08-28 06:25:25 +08:00
|
|
|
bool enabled;
|
2010-09-04 17:04:34 +08:00
|
|
|
bool use_event;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-08-28 06:25:25 +08:00
|
|
|
#define RADEON_WB_SCRATCH_OFFSET 0
|
|
|
|
#define RADEON_WB_CP_RPTR_OFFSET 1024
|
2011-03-03 09:07:31 +08:00
|
|
|
#define RADEON_WB_CP1_RPTR_OFFSET 1280
|
|
|
|
#define RADEON_WB_CP2_RPTR_OFFSET 1536
|
2010-08-28 06:25:25 +08:00
|
|
|
#define R600_WB_IH_WPTR_OFFSET 2048
|
2010-09-04 17:04:34 +08:00
|
|
|
#define R600_WB_EVENT_OFFSET 3072
|
2010-08-28 06:25:25 +08:00
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
/**
|
|
|
|
* struct radeon_pm - power management datas
|
|
|
|
* @max_bandwidth: maximum bandwidth the gpu has (MByte/s)
|
|
|
|
* @igp_sideport_mclk: sideport memory clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_system_mclk: system clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_clk: ht link clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_width: ht link width in bits (rs690,rs740,rs780,rs880)
|
|
|
|
* @k8_bandwidth: k8 bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @sideport_bandwidth: sideport bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @ht_bandwidth: ht bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @core_bandwidth: core GPU bandwidth the gpu has (MByte/s) (IGP)
|
2011-03-31 09:57:33 +08:00
|
|
|
* @sclk: GPU clock Mhz (core bandwidth depends of this clock)
|
2009-07-14 03:04:08 +08:00
|
|
|
* @needed_bandwidth: current bandwidth needs
|
|
|
|
*
|
|
|
|
* It keeps track of various data needed to take powermanagement decision.
|
2011-03-31 09:57:33 +08:00
|
|
|
* Bandwidth need is used to determine minimun clock of the GPU and memory.
|
2009-07-14 03:04:08 +08:00
|
|
|
* Equation between gpu/memory clock and available bandwidth is hw dependent
|
|
|
|
* (type of memory, bus size, efficiency, ...)
|
|
|
|
*/
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
enum radeon_pm_method {
|
|
|
|
PM_METHOD_PROFILE,
|
|
|
|
PM_METHOD_DYNPM,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum radeon_dynpm_state {
|
|
|
|
DYNPM_STATE_DISABLED,
|
|
|
|
DYNPM_STATE_MINIMUM,
|
|
|
|
DYNPM_STATE_PAUSED,
|
DRM / radeon / KMS: Fix hibernation regression related to radeon PM (was: Re: [Regression, post-2.6.34] Hibernation broken on machines with radeon/KMS and r300)
There is a regression from 2.6.34 related to the recent radeon power
management changes, caused by attempting to cancel a delayed work
item that's never been scheduled. However, the code as is has some
other issues potentially leading to visible problems.
First, the mutex around cancel_delayed_work() in radeon_pm_suspend()
doesn't really serve any purpose, because cancel_delayed_work() only
tries to delete the work's timer. Moreover, it doesn't prevent the
work handler from running, so the handler can do some wrong things if
it wins the race and in that case it will rearm itself to do some
more wrong things going forward. So, I think it's better to wait for
the handler to return in case it's already been queued up for
execution. Also, it should be prevented from rearming itself in that
case.
Second, in radeon_set_pm_method() the cancel_delayed_work() is not
sufficient to prevent the work handler from running and queing up
itself for the next run (the failure scenario is that
cancel_delayed_work() returns 0, so the handler is run, it waits on
the mutex and then rearms itself after the mutex has been released),
so again the work handler should be prevented from rearming itself in
that case..
Finally, there's a potential deadlock in radeon_pm_fini(), because
cancel_delayed_work_sync() is called under rdev->pm.mutex, but the
work handler tries to acquire the same mutex (if it wins the race).
Fix the issues described above.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-06-18 07:02:27 +08:00
|
|
|
DYNPM_STATE_ACTIVE,
|
|
|
|
DYNPM_STATE_SUSPENDED,
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_dynpm_action {
|
|
|
|
DYNPM_ACTION_NONE,
|
|
|
|
DYNPM_ACTION_MINIMUM,
|
|
|
|
DYNPM_ACTION_DOWNCLOCK,
|
|
|
|
DYNPM_ACTION_UPCLOCK,
|
|
|
|
DYNPM_ACTION_DEFAULT
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2009-12-29 02:58:44 +08:00
|
|
|
|
|
|
|
enum radeon_voltage_type {
|
|
|
|
VOLTAGE_NONE = 0,
|
|
|
|
VOLTAGE_GPIO,
|
|
|
|
VOLTAGE_VDDC,
|
|
|
|
VOLTAGE_SW
|
|
|
|
};
|
|
|
|
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type {
|
|
|
|
POWER_STATE_TYPE_DEFAULT,
|
|
|
|
POWER_STATE_TYPE_POWERSAVE,
|
|
|
|
POWER_STATE_TYPE_BATTERY,
|
|
|
|
POWER_STATE_TYPE_BALANCED,
|
|
|
|
POWER_STATE_TYPE_PERFORMANCE,
|
|
|
|
};
|
|
|
|
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_pm_profile_type {
|
|
|
|
PM_PROFILE_DEFAULT,
|
|
|
|
PM_PROFILE_AUTO,
|
|
|
|
PM_PROFILE_LOW,
|
2010-06-03 05:56:01 +08:00
|
|
|
PM_PROFILE_MID,
|
2010-05-08 03:10:16 +08:00
|
|
|
PM_PROFILE_HIGH,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define PM_PROFILE_DEFAULT_IDX 0
|
|
|
|
#define PM_PROFILE_LOW_SH_IDX 1
|
2010-06-03 05:56:01 +08:00
|
|
|
#define PM_PROFILE_MID_SH_IDX 2
|
|
|
|
#define PM_PROFILE_HIGH_SH_IDX 3
|
|
|
|
#define PM_PROFILE_LOW_MH_IDX 4
|
|
|
|
#define PM_PROFILE_MID_MH_IDX 5
|
|
|
|
#define PM_PROFILE_HIGH_MH_IDX 6
|
|
|
|
#define PM_PROFILE_MAX 7
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
struct radeon_pm_profile {
|
|
|
|
int dpms_off_ps_idx;
|
|
|
|
int dpms_on_ps_idx;
|
|
|
|
int dpms_off_cm_idx;
|
|
|
|
int dpms_on_cm_idx;
|
2009-12-24 03:28:05 +08:00
|
|
|
};
|
|
|
|
|
2010-07-03 00:58:16 +08:00
|
|
|
enum radeon_int_thermal_type {
|
|
|
|
THERMAL_TYPE_NONE,
|
|
|
|
THERMAL_TYPE_RV6XX,
|
|
|
|
THERMAL_TYPE_RV770,
|
|
|
|
THERMAL_TYPE_EVERGREEN,
|
2010-11-23 06:56:32 +08:00
|
|
|
THERMAL_TYPE_SUMO,
|
2011-01-07 10:19:22 +08:00
|
|
|
THERMAL_TYPE_NI,
|
2010-07-03 00:58:16 +08:00
|
|
|
};
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_voltage {
|
|
|
|
enum radeon_voltage_type type;
|
|
|
|
/* gpio voltage */
|
|
|
|
struct radeon_gpio_rec gpio;
|
|
|
|
u32 delay; /* delay in usec from voltage drop to sclk change */
|
|
|
|
bool active_high; /* voltage drop is active when bit is high */
|
|
|
|
/* VDDC voltage */
|
|
|
|
u8 vddc_id; /* index into vddc voltage table */
|
|
|
|
u8 vddci_id; /* index into vddci voltage table */
|
|
|
|
bool vddci_enabled;
|
|
|
|
/* r6xx+ sw */
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 voltage;
|
|
|
|
/* evergreen+ vddci */
|
|
|
|
u16 vddci;
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-05-03 13:13:14 +08:00
|
|
|
/* clock mode flags */
|
|
|
|
#define RADEON_PM_MODE_NO_DISPLAY (1 << 0)
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_pm_clock_info {
|
|
|
|
/* memory clock */
|
|
|
|
u32 mclk;
|
|
|
|
/* engine clock */
|
|
|
|
u32 sclk;
|
|
|
|
/* voltage info */
|
|
|
|
struct radeon_voltage voltage;
|
2010-05-03 13:13:14 +08:00
|
|
|
/* standardized clock flags */
|
2009-12-29 02:58:44 +08:00
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2010-04-23 02:03:55 +08:00
|
|
|
/* state flags */
|
2010-05-03 13:13:14 +08:00
|
|
|
#define RADEON_PM_STATE_SINGLE_DISPLAY_ONLY (1 << 0)
|
2010-04-23 02:03:55 +08:00
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_power_state {
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type type;
|
2011-11-04 22:09:43 +08:00
|
|
|
struct radeon_pm_clock_info *clock_info;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid clock modes in this power state */
|
|
|
|
int num_clock_modes;
|
|
|
|
struct radeon_pm_clock_info *default_clock_mode;
|
2010-04-23 02:03:55 +08:00
|
|
|
/* standardized state flags */
|
|
|
|
u32 flags;
|
2010-04-23 02:25:19 +08:00
|
|
|
u32 misc; /* vbios specific flags */
|
|
|
|
u32 misc2; /* vbios specific flags */
|
|
|
|
int pcie_lanes; /* pcie lanes */
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-02-12 06:16:36 +08:00
|
|
|
/*
|
|
|
|
* Some modes are overclocked by very low value, accept them
|
|
|
|
*/
|
|
|
|
#define RADEON_MODE_OVERCLOCK_MARGIN 500 /* 5 MHz */
|
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm {
|
2009-12-23 06:02:16 +08:00
|
|
|
struct mutex mutex;
|
2010-04-23 02:03:55 +08:00
|
|
|
u32 active_crtcs;
|
|
|
|
int active_crtc_count;
|
2009-12-23 06:02:16 +08:00
|
|
|
int req_vblank;
|
2010-03-03 05:06:51 +08:00
|
|
|
bool vblank_sync;
|
2010-04-23 00:52:11 +08:00
|
|
|
bool gui_idle;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 max_bandwidth;
|
|
|
|
fixed20_12 igp_sideport_mclk;
|
|
|
|
fixed20_12 igp_system_mclk;
|
|
|
|
fixed20_12 igp_ht_link_clk;
|
|
|
|
fixed20_12 igp_ht_link_width;
|
|
|
|
fixed20_12 k8_bandwidth;
|
|
|
|
fixed20_12 sideport_bandwidth;
|
|
|
|
fixed20_12 ht_bandwidth;
|
|
|
|
fixed20_12 core_bandwidth;
|
|
|
|
fixed20_12 sclk;
|
2010-03-17 08:54:38 +08:00
|
|
|
fixed20_12 mclk;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 needed_bandwidth;
|
2011-02-03 07:42:03 +08:00
|
|
|
struct radeon_power_state *power_state;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid power states */
|
|
|
|
int num_power_states;
|
2010-04-23 02:03:55 +08:00
|
|
|
int current_power_state_index;
|
|
|
|
int current_clock_mode_index;
|
|
|
|
int requested_power_state_index;
|
|
|
|
int requested_clock_mode_index;
|
|
|
|
int default_power_state_index;
|
|
|
|
u32 current_sclk;
|
|
|
|
u32 current_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 current_vddc;
|
|
|
|
u16 current_vddci;
|
2011-01-07 10:19:26 +08:00
|
|
|
u32 default_sclk;
|
|
|
|
u32 default_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 default_vddc;
|
|
|
|
u16 default_vddci;
|
2010-03-11 23:01:17 +08:00
|
|
|
struct radeon_i2c_chan *i2c_bus;
|
2010-05-08 03:10:16 +08:00
|
|
|
/* selected pm method */
|
|
|
|
enum radeon_pm_method pm_method;
|
|
|
|
/* dynpm power management */
|
|
|
|
struct delayed_work dynpm_idle_work;
|
|
|
|
enum radeon_dynpm_state dynpm_state;
|
|
|
|
enum radeon_dynpm_action dynpm_planned_action;
|
|
|
|
unsigned long dynpm_action_timeout;
|
|
|
|
bool dynpm_can_upclock;
|
|
|
|
bool dynpm_can_downclock;
|
|
|
|
/* profile-based power management */
|
|
|
|
enum radeon_pm_profile_type profile;
|
|
|
|
int profile_index;
|
|
|
|
struct radeon_pm_profile profiles[PM_PROFILE_MAX];
|
2010-07-03 00:58:16 +08:00
|
|
|
/* internal thermal controller on rv6xx+ */
|
|
|
|
enum radeon_int_thermal_type int_thermal_type;
|
|
|
|
struct device *int_hwmon_dev;
|
2009-07-14 03:04:08 +08:00
|
|
|
};
|
|
|
|
|
2011-11-04 22:09:41 +08:00
|
|
|
int radeon_pm_get_type_index(struct radeon_device *rdev,
|
|
|
|
enum radeon_pm_state_type ps_type,
|
|
|
|
int instance);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Benchmarking
|
|
|
|
*/
|
2011-10-13 11:29:39 +08:00
|
|
|
void radeon_benchmark(struct radeon_device *rdev, int test_number);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
2009-07-21 17:23:57 +08:00
|
|
|
/*
|
|
|
|
* Testing
|
|
|
|
*/
|
|
|
|
void radeon_test_moves(struct radeon_device *rdev);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_ring_sync(struct radeon_device *rdev,
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring *cpA,
|
|
|
|
struct radeon_ring *cpB);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_syncing(struct radeon_device *rdev);
|
2009-07-21 17:23:57 +08:00
|
|
|
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Debugfs
|
|
|
|
*/
|
2011-10-24 20:54:54 +08:00
|
|
|
struct radeon_debugfs {
|
|
|
|
struct drm_info_list *files;
|
|
|
|
unsigned num_files;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_debugfs_add_files(struct radeon_device *rdev,
|
|
|
|
struct drm_info_list *files,
|
|
|
|
unsigned nfiles);
|
|
|
|
int radeon_debugfs_fence_init(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASIC specific functions.
|
|
|
|
*/
|
|
|
|
struct radeon_asic {
|
2009-06-17 19:28:30 +08:00
|
|
|
int (*init)(struct radeon_device *rdev);
|
2009-09-08 08:10:24 +08:00
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
int (*resume)(struct radeon_device *rdev);
|
|
|
|
int (*suspend)(struct radeon_device *rdev);
|
2009-09-21 12:33:58 +08:00
|
|
|
void (*vga_set_state)(struct radeon_device *rdev, bool state);
|
2011-10-23 18:56:27 +08:00
|
|
|
bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
|
2010-03-09 22:45:11 +08:00
|
|
|
int (*asic_reset)(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void (*gart_tlb_flush)(struct radeon_device *rdev);
|
|
|
|
int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr);
|
|
|
|
int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
|
|
|
|
void (*cp_fini)(struct radeon_device *rdev);
|
|
|
|
void (*cp_disable)(struct radeon_device *rdev);
|
|
|
|
void (*ring_start)(struct radeon_device *rdev);
|
2011-10-20 01:02:21 +08:00
|
|
|
|
|
|
|
struct {
|
|
|
|
void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
|
2011-10-20 01:02:21 +08:00
|
|
|
void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
|
2011-10-23 18:56:27 +08:00
|
|
|
void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
|
2011-10-20 01:02:21 +08:00
|
|
|
struct radeon_semaphore *semaphore, bool emit_wait);
|
|
|
|
} ring[RADEON_NUM_RINGS];
|
|
|
|
|
2011-10-23 18:56:27 +08:00
|
|
|
int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
|
2009-06-05 20:42:42 +08:00
|
|
|
int (*irq_set)(struct radeon_device *rdev);
|
|
|
|
int (*irq_process)(struct radeon_device *rdev);
|
2009-08-13 17:10:51 +08:00
|
|
|
u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
|
2009-06-05 20:42:42 +08:00
|
|
|
int (*cs_parse)(struct radeon_cs_parser *p);
|
|
|
|
int (*copy_blit)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
2011-09-17 00:04:08 +08:00
|
|
|
unsigned num_gpu_pages,
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *fence);
|
|
|
|
int (*copy_dma)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
2011-09-17 00:04:08 +08:00
|
|
|
unsigned num_gpu_pages,
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *fence);
|
|
|
|
int (*copy)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
2011-09-17 00:04:08 +08:00
|
|
|
unsigned num_gpu_pages,
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *fence);
|
2009-11-03 07:53:02 +08:00
|
|
|
uint32_t (*get_engine_clock)(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
|
2009-11-03 07:53:02 +08:00
|
|
|
uint32_t (*get_memory_clock)(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
|
2009-12-23 23:07:50 +08:00
|
|
|
int (*get_pcie_lanes)(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
|
|
|
|
void (*set_clock_gating)(struct radeon_device *rdev, int enable);
|
2009-06-24 07:48:08 +08:00
|
|
|
int (*set_surface_reg)(struct radeon_device *rdev, int reg,
|
|
|
|
uint32_t tiling_flags, uint32_t pitch,
|
|
|
|
uint32_t offset, uint32_t obj_size);
|
2010-03-12 05:19:16 +08:00
|
|
|
void (*clear_surface_reg)(struct radeon_device *rdev, int reg);
|
2009-07-14 03:04:08 +08:00
|
|
|
void (*bandwidth_update)(struct radeon_device *rdev);
|
2009-12-05 04:26:55 +08:00
|
|
|
void (*hpd_init)(struct radeon_device *rdev);
|
|
|
|
void (*hpd_fini)(struct radeon_device *rdev);
|
|
|
|
bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
|
|
|
void (*hpd_set_polarity)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
2010-02-05 03:36:39 +08:00
|
|
|
/* ioctl hw specific callback. Some hw might want to perform special
|
|
|
|
* operation on specific ioctl. For instance on wait idle some hw
|
|
|
|
* might want to perform and HDP flush through MMIO as it seems that
|
|
|
|
* some R6XX/R7XX hw doesn't take HDP flush into account if programmed
|
|
|
|
* through ring.
|
|
|
|
*/
|
|
|
|
void (*ioctl_wait_idle)(struct radeon_device *rdev, struct radeon_bo *bo);
|
2010-04-23 00:39:58 +08:00
|
|
|
bool (*gui_idle)(struct radeon_device *rdev);
|
2010-05-08 03:10:16 +08:00
|
|
|
/* power management */
|
2010-04-24 05:57:27 +08:00
|
|
|
void (*pm_misc)(struct radeon_device *rdev);
|
|
|
|
void (*pm_prepare)(struct radeon_device *rdev);
|
|
|
|
void (*pm_finish)(struct radeon_device *rdev);
|
2010-05-08 03:10:16 +08:00
|
|
|
void (*pm_init_profile)(struct radeon_device *rdev);
|
|
|
|
void (*pm_get_dynpm_state)(struct radeon_device *rdev);
|
2010-11-21 23:59:01 +08:00
|
|
|
/* pageflipping */
|
|
|
|
void (*pre_page_flip)(struct radeon_device *rdev, int crtc);
|
|
|
|
u32 (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base);
|
|
|
|
void (*post_page_flip)(struct radeon_device *rdev, int crtc);
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
/*
|
|
|
|
* Asic structures
|
|
|
|
*/
|
2010-03-09 22:45:10 +08:00
|
|
|
struct r100_gpu_lockup {
|
|
|
|
unsigned long last_jiffies;
|
|
|
|
u32 last_cp_rptr;
|
|
|
|
};
|
|
|
|
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 hdp_cntl;
|
|
|
|
struct r100_gpu_lockup lockup;
|
2009-09-01 13:25:57 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
struct r300_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 resync_scratch;
|
|
|
|
u32 hdp_cntl;
|
|
|
|
struct r100_gpu_lockup lockup;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2010-03-09 22:45:10 +08:00
|
|
|
struct r100_gpu_lockup lockup;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct rv770_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_fize;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2010-03-09 22:45:10 +08:00
|
|
|
struct r100_gpu_lockup lockup;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic {
|
|
|
|
unsigned num_ses;
|
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2010-12-22 05:05:39 +08:00
|
|
|
struct r100_gpu_lockup lockup;
|
2010-03-25 01:33:47 +08:00
|
|
|
};
|
|
|
|
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_pipes_per_simd;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds_per_se;
|
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_shader_engines;
|
|
|
|
unsigned num_shader_pipes_per_simd;
|
|
|
|
unsigned num_tile_pipes;
|
|
|
|
unsigned num_simds_per_se;
|
|
|
|
unsigned num_backends_per_se;
|
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
|
|
|
struct r100_gpu_lockup lockup;
|
|
|
|
};
|
|
|
|
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config {
|
|
|
|
struct r300_asic r300;
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic r100;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_asic r600;
|
|
|
|
struct rv770_asic rv770;
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic evergreen;
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic cayman;
|
2009-06-17 19:28:30 +08:00
|
|
|
};
|
|
|
|
|
2010-03-12 05:19:14 +08:00
|
|
|
/*
|
|
|
|
* asic initizalization from radeon_asic.c
|
|
|
|
*/
|
|
|
|
void radeon_agp_disable(struct radeon_device *rdev);
|
|
|
|
int radeon_asic_init(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* IOCTL.
|
|
|
|
*/
|
|
|
|
int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pread_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
2009-06-24 07:48:08 +08:00
|
|
|
int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/* VRAM scratch page for HDP bug, default vram page */
|
|
|
|
struct r600_vram_scratch {
|
2010-08-28 01:59:54 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
volatile uint32_t *ptr;
|
2011-10-28 22:30:02 +08:00
|
|
|
u64 gpu_addr;
|
2010-08-28 01:59:54 +08:00
|
|
|
};
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-11-11 01:57:26 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Core structure, functions and helpers.
|
|
|
|
*/
|
|
|
|
typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t);
|
|
|
|
typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t);
|
|
|
|
|
|
|
|
struct radeon_device {
|
2009-09-11 21:35:22 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct drm_device *ddev;
|
|
|
|
struct pci_dev *pdev;
|
|
|
|
/* ASIC */
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config config;
|
2009-06-05 20:42:42 +08:00
|
|
|
enum radeon_family family;
|
|
|
|
unsigned long flags;
|
|
|
|
int usec_timeout;
|
|
|
|
enum radeon_pll_errata pll_errata;
|
|
|
|
int num_gb_pipes;
|
2009-08-20 07:11:39 +08:00
|
|
|
int num_z_pipes;
|
2009-06-05 20:42:42 +08:00
|
|
|
int disp_priority;
|
|
|
|
/* BIOS */
|
|
|
|
uint8_t *bios;
|
|
|
|
bool is_atom_bios;
|
|
|
|
uint16_t bios_header_start;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *stollen_vga_memory;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* Register mmio */
|
2009-06-29 16:29:12 +08:00
|
|
|
resource_size_t rmmio_base;
|
|
|
|
resource_size_t rmmio_size;
|
2011-07-13 14:28:12 +08:00
|
|
|
void __iomem *rmmio;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t mc_rreg;
|
|
|
|
radeon_wreg_t mc_wreg;
|
|
|
|
radeon_rreg_t pll_rreg;
|
|
|
|
radeon_wreg_t pll_wreg;
|
2009-08-12 16:43:14 +08:00
|
|
|
uint32_t pcie_reg_mask;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t pciep_rreg;
|
|
|
|
radeon_wreg_t pciep_wreg;
|
2010-06-30 23:52:50 +08:00
|
|
|
/* io port */
|
|
|
|
void __iomem *rio_mem;
|
|
|
|
resource_size_t rio_mem_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock clock;
|
|
|
|
struct radeon_mc mc;
|
|
|
|
struct radeon_gart gart;
|
|
|
|
struct radeon_mode_info mode_info;
|
|
|
|
struct radeon_scratch scratch;
|
|
|
|
struct radeon_mman mman;
|
2011-08-26 01:39:48 +08:00
|
|
|
rwlock_t fence_lock;
|
|
|
|
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
|
2011-09-16 01:02:22 +08:00
|
|
|
struct radeon_semaphore_driver semaphore_drv;
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring ring[RADEON_NUM_RINGS];
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib_pool ib_pool;
|
|
|
|
struct radeon_irq irq;
|
|
|
|
struct radeon_asic *asic;
|
|
|
|
struct radeon_gem gem;
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm pm;
|
2009-09-15 10:21:01 +08:00
|
|
|
uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
|
2011-11-11 01:57:26 +08:00
|
|
|
struct radeon_mutex cs_mutex;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_wb wb;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page dummy_page;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool gpu_lockup;
|
|
|
|
bool shutdown;
|
|
|
|
bool suspend;
|
2009-07-10 20:36:26 +08:00
|
|
|
bool need_dma32;
|
2009-09-16 21:24:21 +08:00
|
|
|
bool accel_working;
|
2009-06-24 07:48:08 +08:00
|
|
|
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
|
2009-09-08 08:10:24 +08:00
|
|
|
const struct firmware *me_fw; /* all family ME firmware */
|
|
|
|
const struct firmware *pfp_fw; /* r6/700 PFP firmware */
|
2009-12-02 02:43:46 +08:00
|
|
|
const struct firmware *rlc_fw; /* r6/700 RLC firmware */
|
2011-01-07 10:19:31 +08:00
|
|
|
const struct firmware *mc_fw; /* NI MC firmware */
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_blit r600_blit;
|
2011-10-28 22:30:02 +08:00
|
|
|
struct r600_vram_scratch vram_scratch;
|
2009-10-17 00:21:24 +08:00
|
|
|
int msi_enabled; /* msi enabled */
|
2009-12-02 02:43:46 +08:00
|
|
|
struct r600_ih ih; /* r6/700 interrupt ring */
|
2009-12-05 05:56:37 +08:00
|
|
|
struct work_struct hotplug_work;
|
2010-02-02 05:02:25 +08:00
|
|
|
int num_crtc; /* number of crtcs */
|
2009-12-23 16:23:21 +08:00
|
|
|
struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
|
2010-04-27 03:52:20 +08:00
|
|
|
struct mutex vram_mutex;
|
2009-10-12 05:49:13 +08:00
|
|
|
|
|
|
|
/* audio stuff */
|
2010-06-19 18:24:56 +08:00
|
|
|
bool audio_enabled;
|
2009-10-12 05:49:13 +08:00
|
|
|
struct timer_list audio_timer;
|
|
|
|
int audio_channels;
|
|
|
|
int audio_rate;
|
|
|
|
int audio_bits_per_sample;
|
|
|
|
uint8_t audio_status_bits;
|
|
|
|
uint8_t audio_category_code;
|
2010-02-01 13:38:10 +08:00
|
|
|
|
2010-05-08 03:10:16 +08:00
|
|
|
struct notifier_block acpi_nb;
|
2011-01-05 12:46:48 +08:00
|
|
|
/* only one userspace can use Hyperz features or CMASK at a time */
|
2010-07-13 09:11:11 +08:00
|
|
|
struct drm_file *hyperz_filp;
|
2011-01-05 12:46:48 +08:00
|
|
|
struct drm_file *cmask_filp;
|
2010-08-06 09:21:16 +08:00
|
|
|
/* i2c buses */
|
|
|
|
struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
|
2011-10-24 20:54:54 +08:00
|
|
|
/* debugfs */
|
|
|
|
struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
|
|
|
|
unsigned debugfs_count;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* virtual memory */
|
|
|
|
struct radeon_vm_manager vm_manager;
|
2012-01-06 11:11:07 +08:00
|
|
|
/* ring used for bo copies */
|
|
|
|
u32 copy_ring;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_device_init(struct radeon_device *rdev,
|
|
|
|
struct drm_device *ddev,
|
|
|
|
struct pci_dev *pdev,
|
|
|
|
uint32_t flags);
|
|
|
|
void radeon_device_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
|
|
|
|
|
2011-10-14 07:08:42 +08:00
|
|
|
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
|
|
|
|
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
2010-06-30 23:52:50 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
/*
|
|
|
|
* Cast helper
|
|
|
|
*/
|
|
|
|
#define to_radeon_fence(p) ((struct radeon_fence *)(p))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Registers read & write functions.
|
|
|
|
*/
|
2011-07-13 14:28:12 +08:00
|
|
|
#define RREG8(reg) readb((rdev->rmmio) + (reg))
|
|
|
|
#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
|
|
|
|
#define RREG16(reg) readw((rdev->rmmio) + (reg))
|
|
|
|
#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
|
2009-08-12 16:43:14 +08:00
|
|
|
#define RREG32(reg) r100_mm_rreg(rdev, (reg))
|
2009-09-08 08:10:24 +08:00
|
|
|
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
|
2009-08-12 16:43:14 +08:00
|
|
|
#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PLL(reg, v) rdev->pll_wreg(rdev, (reg), (v))
|
|
|
|
#define RREG32_MC(reg) rdev->mc_rreg(rdev, (reg))
|
|
|
|
#define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
|
2009-08-12 16:43:14 +08:00
|
|
|
#define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
|
2010-02-19 04:24:28 +08:00
|
|
|
#define RREG32_PCIE_P(reg) rdev->pciep_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE_P(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define WREG32_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32(reg, tmp_); \
|
|
|
|
} while (0)
|
|
|
|
#define WREG32_PLL_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32_PLL(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32_PLL(reg, tmp_); \
|
|
|
|
} while (0)
|
2009-09-08 08:10:24 +08:00
|
|
|
#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
|
2010-06-30 23:52:50 +08:00
|
|
|
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
|
|
|
|
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-08-12 16:43:14 +08:00
|
|
|
/*
|
|
|
|
* Indirect registers accessor
|
|
|
|
*/
|
|
|
|
static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
|
|
|
|
{
|
|
|
|
uint32_t r;
|
|
|
|
|
|
|
|
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
|
|
|
|
r = RREG32(RADEON_PCIE_DATA);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
|
|
|
|
{
|
|
|
|
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
|
|
|
|
WREG32(RADEON_PCIE_DATA, (v));
|
|
|
|
}
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
void r100_pll_errata_after_index(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs helpers.
|
|
|
|
*/
|
2009-07-14 00:02:32 +08:00
|
|
|
#define ASIC_IS_RN50(rdev) ((rdev->pdev->device == 0x515e) || \
|
|
|
|
(rdev->pdev->device == 0x5969))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
|
|
|
|
(rdev->family == CHIP_RV200) || \
|
|
|
|
(rdev->family == CHIP_RS100) || \
|
|
|
|
(rdev->family == CHIP_RS200) || \
|
|
|
|
(rdev->family == CHIP_RV250) || \
|
|
|
|
(rdev->family == CHIP_RV280) || \
|
|
|
|
(rdev->family == CHIP_RS300))
|
|
|
|
#define ASIC_IS_R300(rdev) ((rdev->family == CHIP_R300) || \
|
|
|
|
(rdev->family == CHIP_RV350) || \
|
|
|
|
(rdev->family == CHIP_R350) || \
|
|
|
|
(rdev->family == CHIP_RV380) || \
|
|
|
|
(rdev->family == CHIP_R420) || \
|
|
|
|
(rdev->family == CHIP_R423) || \
|
|
|
|
(rdev->family == CHIP_RV410) || \
|
|
|
|
(rdev->family == CHIP_RS400) || \
|
|
|
|
(rdev->family == CHIP_RS480))
|
2011-01-07 07:49:34 +08:00
|
|
|
#define ASIC_IS_X2(rdev) ((rdev->ddev->pdev->device == 0x9441) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9443) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x944B) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9506) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9509) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x950F) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689C) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689D))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
|
2010-11-17 01:09:41 +08:00
|
|
|
#define ASIC_IS_DCE2(rdev) ((rdev->family == CHIP_RS600) || \
|
|
|
|
(rdev->family == CHIP_RS690) || \
|
|
|
|
(rdev->family == CHIP_RS740) || \
|
|
|
|
(rdev->family >= CHIP_R600))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
|
|
|
|
#define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
|
2010-01-13 06:54:34 +08:00
|
|
|
#define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
|
2011-01-07 10:19:11 +08:00
|
|
|
#define ASIC_IS_DCE41(rdev) ((rdev->family >= CHIP_PALM) && \
|
|
|
|
(rdev->flags & RADEON_IS_IGP))
|
2011-01-07 10:19:12 +08:00
|
|
|
#define ASIC_IS_DCE5(rdev) ((rdev->family >= CHIP_BARTS))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* BIOS helpers.
|
|
|
|
*/
|
|
|
|
#define RBIOS8(i) (rdev->bios[i])
|
|
|
|
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
|
|
|
|
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
|
|
|
|
|
|
|
|
int radeon_combios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_combios_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_atombios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RING helpers.
|
|
|
|
*/
|
2011-10-14 07:08:47 +08:00
|
|
|
#if DRM_DEBUG_CODE == 0
|
2011-10-23 18:56:27 +08:00
|
|
|
static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
|
2009-06-05 20:42:42 +08:00
|
|
|
{
|
2011-10-23 18:56:27 +08:00
|
|
|
ring->ring[ring->wptr++] = v;
|
|
|
|
ring->wptr &= ring->ptr_mask;
|
|
|
|
ring->count_dw--;
|
|
|
|
ring->ring_free_dw--;
|
2009-06-05 20:42:42 +08:00
|
|
|
}
|
2011-10-14 07:08:47 +08:00
|
|
|
#else
|
|
|
|
/* With debugging this is just too big to inline */
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
|
2011-10-14 07:08:47 +08:00
|
|
|
#endif
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs macro.
|
|
|
|
*/
|
2009-06-17 19:28:30 +08:00
|
|
|
#define radeon_init(rdev) (rdev)->asic->init((rdev))
|
2009-09-08 08:10:24 +08:00
|
|
|
#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
|
|
|
|
#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
|
|
|
|
#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_cs_parse(p) rdev->asic->cs_parse((p))
|
2009-09-21 12:33:58 +08:00
|
|
|
#define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
|
2011-09-23 21:11:23 +08:00
|
|
|
#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), (cp))
|
2010-03-09 22:45:11 +08:00
|
|
|
#define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev))
|
|
|
|
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p))
|
|
|
|
#define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
|
2011-09-23 21:11:23 +08:00
|
|
|
#define radeon_ring_test(rdev, cp) (rdev)->asic->ring_test((rdev), (cp))
|
2011-10-20 01:02:21 +08:00
|
|
|
#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
|
|
|
|
#define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
|
2009-08-13 17:10:51 +08:00
|
|
|
#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
|
2011-10-20 01:02:21 +08:00
|
|
|
#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence))
|
|
|
|
#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy_blit((rdev), (s), (d), (np), (f))
|
|
|
|
#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy_dma((rdev), (s), (d), (np), (f))
|
|
|
|
#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy((rdev), (s), (d), (np), (f))
|
2009-11-03 07:53:02 +08:00
|
|
|
#define radeon_get_engine_clock(rdev) (rdev)->asic->get_engine_clock((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_set_engine_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
|
2009-11-03 07:53:02 +08:00
|
|
|
#define radeon_get_memory_clock(rdev) (rdev)->asic->get_memory_clock((rdev))
|
2009-11-05 06:34:10 +08:00
|
|
|
#define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_memory_clock((rdev), (e))
|
2009-12-23 23:07:50 +08:00
|
|
|
#define radeon_get_pcie_lanes(rdev) (rdev)->asic->get_pcie_lanes((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
|
|
|
|
#define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
|
2009-06-24 07:48:08 +08:00
|
|
|
#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
|
|
|
|
#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
|
2009-07-14 03:04:08 +08:00
|
|
|
#define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))
|
2009-12-05 04:26:55 +08:00
|
|
|
#define radeon_hpd_init(rdev) (rdev)->asic->hpd_init((rdev))
|
|
|
|
#define radeon_hpd_fini(rdev) (rdev)->asic->hpd_fini((rdev))
|
|
|
|
#define radeon_hpd_sense(rdev, hpd) (rdev)->asic->hpd_sense((rdev), (hpd))
|
|
|
|
#define radeon_hpd_set_polarity(rdev, hpd) (rdev)->asic->hpd_set_polarity((rdev), (hpd))
|
2010-04-23 00:39:58 +08:00
|
|
|
#define radeon_gui_idle(rdev) (rdev)->asic->gui_idle((rdev))
|
2010-04-25 02:50:23 +08:00
|
|
|
#define radeon_pm_misc(rdev) (rdev)->asic->pm_misc((rdev))
|
|
|
|
#define radeon_pm_prepare(rdev) (rdev)->asic->pm_prepare((rdev))
|
|
|
|
#define radeon_pm_finish(rdev) (rdev)->asic->pm_finish((rdev))
|
2010-05-08 03:10:16 +08:00
|
|
|
#define radeon_pm_init_profile(rdev) (rdev)->asic->pm_init_profile((rdev))
|
|
|
|
#define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm_get_dynpm_state((rdev))
|
2010-11-21 23:59:01 +08:00
|
|
|
#define radeon_pre_page_flip(rdev, crtc) rdev->asic->pre_page_flip((rdev), (crtc))
|
|
|
|
#define radeon_page_flip(rdev, crtc, base) rdev->asic->page_flip((rdev), (crtc), (base))
|
|
|
|
#define radeon_post_page_flip(rdev, crtc) rdev->asic->post_page_flip((rdev), (crtc))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-11 03:46:48 +08:00
|
|
|
/* Common functions */
|
2010-01-13 22:16:38 +08:00
|
|
|
/* AGP */
|
2010-03-09 22:45:12 +08:00
|
|
|
extern int radeon_gpu_reset(struct radeon_device *rdev);
|
2010-01-13 22:16:38 +08:00
|
|
|
extern void radeon_agp_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern int radeon_modeset_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_modeset_fini(struct radeon_device *rdev);
|
2009-09-11 21:35:22 +08:00
|
|
|
extern bool radeon_card_posted(struct radeon_device *rdev);
|
2010-03-17 08:54:38 +08:00
|
|
|
extern void radeon_update_bandwidth_info(struct radeon_device *rdev);
|
2010-03-31 12:33:27 +08:00
|
|
|
extern void radeon_update_display_priority(struct radeon_device *rdev);
|
2009-12-01 12:06:31 +08:00
|
|
|
extern bool radeon_boot_test_post_card(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_scratch_init(struct radeon_device *rdev);
|
2010-08-28 06:25:25 +08:00
|
|
|
extern void radeon_wb_fini(struct radeon_device *rdev);
|
|
|
|
extern int radeon_wb_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_wb_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_surface_init(struct radeon_device *rdev);
|
|
|
|
extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
|
2009-10-01 16:20:52 +08:00
|
|
|
extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-09-29 00:34:43 +08:00
|
|
|
extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-12-07 22:52:58 +08:00
|
|
|
extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
|
2009-12-15 04:02:09 +08:00
|
|
|
extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
|
|
|
|
extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
|
2010-02-01 13:38:10 +08:00
|
|
|
extern int radeon_resume_kms(struct drm_device *dev);
|
|
|
|
extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
|
2011-03-14 07:47:24 +08:00
|
|
|
extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
|
2009-09-11 03:46:48 +08:00
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* vm
|
|
|
|
*/
|
|
|
|
int radeon_vm_manager_init(struct radeon_device *rdev);
|
|
|
|
void radeon_vm_manager_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_vm_manager_start(struct radeon_device *rdev);
|
|
|
|
int radeon_vm_manager_suspend(struct radeon_device *rdev);
|
|
|
|
int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
|
|
|
|
int radeon_vm_bo_update_pte(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo,
|
|
|
|
struct ttm_mem_reg *mem);
|
|
|
|
void radeon_vm_bo_invalidate(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo *bo);
|
|
|
|
int radeon_vm_bo_add(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo,
|
|
|
|
uint64_t offset,
|
|
|
|
uint32_t flags);
|
|
|
|
int radeon_vm_bo_rmv(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo);
|
|
|
|
|
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/*
|
|
|
|
* R600 vram scratch functions
|
|
|
|
*/
|
|
|
|
int r600_vram_scratch_init(struct radeon_device *rdev);
|
|
|
|
void r600_vram_scratch_fini(struct radeon_device *rdev);
|
|
|
|
|
2011-02-19 00:59:19 +08:00
|
|
|
/*
|
|
|
|
* r600 functions used by radeon_encoder.c
|
|
|
|
*/
|
2010-03-09 06:14:01 +08:00
|
|
|
extern void r600_hdmi_enable(struct drm_encoder *encoder);
|
|
|
|
extern void r600_hdmi_disable(struct drm_encoder *encoder);
|
2009-10-12 05:49:13 +08:00
|
|
|
extern void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
|
2010-03-25 01:36:43 +08:00
|
|
|
|
2011-01-07 10:19:31 +08:00
|
|
|
extern int ni_init_microcode(struct radeon_device *rdev);
|
2011-03-03 09:07:34 +08:00
|
|
|
extern int ni_mc_load_microcode(struct radeon_device *rdev);
|
2011-01-07 10:19:31 +08:00
|
|
|
|
2010-07-06 23:40:24 +08:00
|
|
|
/* radeon_acpi.c */
|
|
|
|
#if defined(CONFIG_ACPI)
|
|
|
|
extern int radeon_acpi_init(struct radeon_device *rdev);
|
|
|
|
#else
|
|
|
|
static inline int radeon_acpi_init(struct radeon_device *rdev) { return 0; }
|
|
|
|
#endif
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include "radeon_object.h"
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
#endif
|