Pixelflinger: Support for handling 64-bit addresses in GGL Assembler

GGLAssembler assumes addresses to be 32-bit and uses ARM 32-bit
instructions to load/store/manipulate addresses. To support, 64-bit
architectures, following changes has been done

1. ARMAssemblerInterface has been extended to support four new
   operations ADDR_LDR, ADDR_STR, ADDR_SUB, ADDR_ADD. Base class
   implements these virtual functions to use 32bit  equivalent
   function. This avoids existing 32-bit Assembler backend
   implementations like ARMAssembler and MIPSAssembler  from
   mapping the new functions to existing equivalent routines.
   This also allows 64-bit Architectures like AArch64 to override
   the function in their assembler backend implementations.

2. GGLAssembler code (spread over GGLAssembler.cpp, GGLAssembler.h
   and texturing.cpp) has been changed to use the new operations
   for address operations.

Change-Id: I3d7eace4691e3e47cef737d97ac67ce6ef4fb18d
Signed-off-by: Ashok Bhat <ashok.bhat@arm.com>
This commit is contained in:
Ashok Bhat 2013-02-21 10:27:40 +00:00 committed by David Butcher
parent d10afb1748
commit bfc6dc4ca8
7 changed files with 80 additions and 19 deletions

View File

@ -61,6 +61,29 @@ uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W)
((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
}
// The following four functions are required for address manipulation
// These are virtual functions, which can be overridden by architectures
// that need special handling of address values (e.g. 64-bit arch)
void ARMAssemblerInterface::ADDR_LDR(int cc, int Rd,
int Rn, uint32_t offset)
{
LDR(cc, Rd, Rn, offset);
}
void ARMAssemblerInterface::ADDR_STR(int cc, int Rd,
int Rn, uint32_t offset)
{
STR(cc, Rd, Rn, offset);
}
void ARMAssemblerInterface::ADDR_ADD(int cc, int s,
int Rd, int Rn, uint32_t Op2)
{
dataProcessing(opADD, cc, s, Rd, Rn, Op2);
}
void ARMAssemblerInterface::ADDR_SUB(int cc, int s,
int Rd, int Rn, uint32_t Op2)
{
dataProcessing(opSUB, cc, s, Rd, Rn, Op2);
}
}; // namespace android

View File

@ -331,6 +331,16 @@ public:
inline void
SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLAW(cc, yT, Rd, Rm, Rs, Rn); }
// Address loading/storing/manipulation
virtual void ADDR_LDR(int cc, int Rd,
int Rn, uint32_t offset = __immed12_pre(0));
virtual void ADDR_STR (int cc, int Rd,
int Rn, uint32_t offset = __immed12_pre(0));
virtual void ADDR_ADD(int cc, int s, int Rd,
int Rn, uint32_t Op2);
virtual void ADDR_SUB(int cc, int s, int Rd,
int Rn, uint32_t Op2);
};
}; // namespace android

View File

@ -294,5 +294,18 @@ void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) {
mTarget->UBFX(cc, Rd, Rn, lsb, width);
}
void ARMAssemblerProxy::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->ADDR_LDR(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->ADDR_STR(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2){
mTarget->ADDR_ADD(cc, s, Rd, Rn, Op2);
}
void ARMAssemblerProxy::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2){
mTarget->ADDR_SUB(cc, s, Rd, Rn, Op2);
}
}; // namespace android

View File

@ -146,6 +146,15 @@ public:
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
virtual void ADDR_LDR(int cc, int Rd,
int Rn, uint32_t offset = __immed12_pre(0));
virtual void ADDR_STR (int cc, int Rd,
int Rn, uint32_t offset = __immed12_pre(0));
virtual void ADDR_ADD(int cc, int s, int Rd,
int Rn, uint32_t Op2);
virtual void ADDR_SUB(int cc, int s, int Rd,
int Rn, uint32_t Op2);
private:
ARMAssemblerInterface* mTarget;
};

View File

@ -263,7 +263,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
const int mask = GGL_DITHER_SIZE-1;
parts.dither = reg_t(regs.obtain());
AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
LDRB(AL, parts.dither.reg, parts.dither.reg,
immed12_pre(GGL_OFFSETOF(ditherMatrix)));
}
@ -336,7 +336,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
build_iterate_z(parts);
build_iterate_f(parts);
if (!mAllMasked) {
ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
}
SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
B(PL, "fragment_loop");
@ -392,7 +392,7 @@ void GGLAssembler::build_scanline_prolog(
int Rs = scratches.obtain();
parts.cbPtr.setTo(obtainReg(), cb_bits);
CONTEXT_LOAD(Rs, state.buffers.color.stride);
CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
base_offset(parts.cbPtr, parts.cbPtr, Rs);
scratches.recycle(Rs);
@ -428,11 +428,11 @@ void GGLAssembler::build_scanline_prolog(
int Rs = dzdx;
int zbase = scratches.obtain();
CONTEXT_LOAD(Rs, state.buffers.depth.stride);
CONTEXT_LOAD(zbase, state.buffers.depth.data);
CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
SMLABB(AL, Rs, Ry, Rs, Rx);
ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
CONTEXT_STORE(zbase, generated_vars.zbase);
ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
}
// init texture coordinates
@ -445,8 +445,8 @@ void GGLAssembler::build_scanline_prolog(
// init coverage factor application (anti-aliasing)
if (mAA) {
parts.covPtr.setTo(obtainReg(), 16);
CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
}
}
@ -765,8 +765,8 @@ void GGLAssembler::build_depth_test(
int depth = scratches.obtain();
int z = parts.z.reg;
CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall
ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
// above does zbase = zbase + ((count >> 16) << 1)
if (mask & Z_TEST) {
@ -990,22 +990,22 @@ void GGLAssembler::base_offset(
{
switch (b.size) {
case 32:
ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
break;
case 24:
if (d.reg == b.reg) {
ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
ADD(AL, 0, d.reg, d.reg, o.reg);
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
} else {
ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
ADD(AL, 0, d.reg, d.reg, b.reg);
ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
}
break;
case 16:
ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
break;
case 8:
ADD(AL, 0, d.reg, b.reg, o.reg);
ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
break;
}
}

View File

@ -31,6 +31,12 @@ namespace android {
// ----------------------------------------------------------------------------
#define CONTEXT_ADDR_LOAD(REG, FIELD) \
ADDR_LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
#define CONTEXT_ADDR_STORE(REG, FIELD) \
ADDR_STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
#define CONTEXT_LOAD(REG, FIELD) \
LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))

View File

@ -356,7 +356,7 @@ void GGLAssembler::init_textures(
// merge base & offset
CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride
CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
base_offset(txPtr, txPtr, Rx);
} else {
Scratch scratches(registerFile());
@ -629,7 +629,7 @@ void GGLAssembler::build_textures( fragment_parts_t& parts,
return;
CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
SMLABB(AL, u, v, stride, u); // u+v*stride
base_offset(txPtr, txPtr, u);