Merge "Adds UXTB16 support to Pixelflinger"

This commit is contained in:
Jean-Baptiste Queru 2010-04-29 09:11:37 -07:00 committed by Android Code Review
commit 86abd5fcab
7 changed files with 128 additions and 1 deletions

View File

@ -424,5 +424,15 @@ void ARMAssembler::SMLAW(int cc, int y,
*mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
}
#if 0
#pragma mark -
#pragma mark Byte/half word extract and extend (ARMv6+ only)...
#endif
void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
{
*mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
}
}; // namespace android

View File

@ -123,6 +123,7 @@ public:
int RdHi, int RdLo, int Rs, int Rm);
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
private:
ARMAssembler(const ARMAssembler& rhs);

View File

@ -203,6 +203,9 @@ public:
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn) = 0;
// byte/half word extract...
virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0;
// -----------------------------------------------------------------------
// convenience...
// -----------------------------------------------------------------------

View File

@ -195,6 +195,9 @@ void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) {
mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn);
}
void ARMAssemblerProxy::UXTB16(int cc, int Rd, int Rm, int rotate) {
mTarget->UXTB16(cc, Rd, Rm, rotate);
}
}; // namespace android

View File

@ -114,6 +114,8 @@ public:
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
private:
ARMAssemblerInterface* mTarget;
};

View File

@ -80,6 +80,7 @@
* f - 1st fp operand (register) (bits 12-14)
* g - 2nd fp operand (register) (bits 16-18)
* h - 3rd fp operand (register/immediate) (bits 0-4)
* j - xtb rotate literal (bits 10-11)
* b - branch address
* t - thumb branch address (bits 24, 0-23)
* k - breakpoint comment (bits 0-3, 8-19)
@ -122,6 +123,7 @@ static const struct arm32_insn arm32_i[] = {
{ 0x0fe000f0, 0x00c00090, "smull", "Sdnms" },
{ 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" },
{ 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" },
{ 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" },
{ 0x0d700000, 0x04200000, "strt", "daW" },
{ 0x0d700000, 0x04300000, "ldrt", "daW" },
{ 0x0d700000, 0x04600000, "strbt", "daW" },
@ -406,6 +408,10 @@ disasm(const disasm_interface_t *di, u_int loc, int altfmt)
else
di->di_printf("f%d", insn & 7);
break;
/* j - xtb rotate literal (bits 10-11) */
case 'j':
di->di_printf("ror #%d", ((insn >> 10) & 3) << 3);
break;
/* b - branch address */
case 'b':
branch = ((insn << 2) & 0x03ffffff);

View File

@ -25,6 +25,7 @@
#include "codeflinger/GGLAssembler.h"
#include <machine/cpu-features.h>
namespace android {
@ -567,7 +568,7 @@ void GGLAssembler::build_textures( fragment_parts_t& parts,
RSB(GE, 0, height, height, imm(0));
MUL(AL, 0, height, stride, height);
} else {
// u has not been CLAMPed yet
// v has not been CLAMPed yet
CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
MOV(LE, 0, height, imm(0));
@ -868,6 +869,106 @@ void GGLAssembler::filter24(
load(txPtr, texel, 0);
}
#if __ARM_ARCH__ >= 6
// ARMv6 version, using UXTB16, and scheduled for Cortex-A8 pipeline
void GGLAssembler::filter32(
const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
int U, int V, pointer_t& txPtr,
int FRAC_BITS)
{
const int adjust = FRAC_BITS*2 - 8;
const int round = 0;
const int prescale = 16 - adjust;
Scratch scratches(registerFile());
int pixel= scratches.obtain();
int dh = scratches.obtain();
int u = scratches.obtain();
int k = scratches.obtain();
int temp = scratches.obtain();
int dl = scratches.obtain();
int offsetrt = scratches.obtain();
int offsetlb = scratches.obtain();
int pixellb = offsetlb;
// RB -> U * V
CONTEXT_LOAD(offsetrt, generated_vars.rt);
CONTEXT_LOAD(offsetlb, generated_vars.lb);
if(!round) {
MOV(AL, 0, U, reg_imm(U, LSL, prescale));
}
ADD(AL, 0, u, offsetrt, offsetlb);
LDR(AL, pixel, txPtr.reg, reg_scale_pre(u));
if (round) {
SMULBB(AL, u, U, V);
RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
} else {
SMULWB(AL, u, U, V);
RSB(AL, 0, U, U, imm(1<<(FRAC_BITS+prescale)));
}
UXTB16(AL, temp, pixel, 0);
if (round) {
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
}
LDR(AL, pixellb, txPtr.reg, reg_scale_pre(offsetlb));
MUL(AL, 0, dh, temp, u);
UXTB16(AL, temp, pixel, 8);
MUL(AL, 0, dl, temp, u);
RSB(AL, 0, k, u, imm(0x100));
// LB -> (1-U) * V
if (round) {
SMULBB(AL, u, U, V);
} else {
SMULWB(AL, u, U, V);
}
UXTB16(AL, temp, pixellb, 0);
if (round) {
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
}
MLA(AL, 0, dh, temp, u, dh);
UXTB16(AL, temp, pixellb, 8);
MLA(AL, 0, dl, temp, u, dl);
SUB(AL, 0, k, k, u);
// LT -> (1-U)*(1-V)
RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
LDR(AL, pixel, txPtr.reg);
if (round) {
SMULBB(AL, u, U, V);
} else {
SMULWB(AL, u, U, V);
}
UXTB16(AL, temp, pixel, 0);
if (round) {
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
}
MLA(AL, 0, dh, temp, u, dh);
UXTB16(AL, temp, pixel, 8);
MLA(AL, 0, dl, temp, u, dl);
// RT -> U*(1-V)
LDR(AL, pixel, txPtr.reg, reg_scale_pre(offsetrt));
SUB(AL, 0, u, k, u);
UXTB16(AL, temp, pixel, 0);
MLA(AL, 0, dh, temp, u, dh);
UXTB16(AL, temp, pixel, 8);
MLA(AL, 0, dl, temp, u, dl);
UXTB16(AL, dh, dh, 8);
UXTB16(AL, dl, dl, 8);
ORR(AL, 0, texel.reg, dh, reg_imm(dl, LSL, 8));
}
#else
void GGLAssembler::filter32(
const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
@ -955,6 +1056,7 @@ void GGLAssembler::filter32(
AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
ORR(AL, 0, texel.reg, dh, dl);
}
#endif
void GGLAssembler::build_texture_environment(
component_t& fragment,