Merge "Adds UXTB16 support to Pixelflinger"
This commit is contained in:
commit
86abd5fcab
|
@ -424,5 +424,15 @@ void ARMAssembler::SMLAW(int cc, int y,
|
|||
*mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
|
||||
}
|
||||
|
||||
#if 0
|
||||
#pragma mark -
|
||||
#pragma mark Byte/half word extract and extend (ARMv6+ only)...
|
||||
#endif
|
||||
|
||||
void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
|
||||
{
|
||||
*mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
|
||||
}
|
||||
|
||||
}; // namespace android
|
||||
|
||||
|
|
|
@ -123,6 +123,7 @@ public:
|
|||
int RdHi, int RdLo, int Rs, int Rm);
|
||||
virtual void SMLAW(int cc, int y,
|
||||
int Rd, int Rm, int Rs, int Rn);
|
||||
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
|
||||
|
||||
private:
|
||||
ARMAssembler(const ARMAssembler& rhs);
|
||||
|
|
|
@ -203,6 +203,9 @@ public:
|
|||
virtual void SMLAW(int cc, int y,
|
||||
int Rd, int Rm, int Rs, int Rn) = 0;
|
||||
|
||||
// byte/half word extract...
|
||||
virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// convenience...
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -195,6 +195,9 @@ void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) {
|
|||
mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn);
|
||||
}
|
||||
|
||||
void ARMAssemblerProxy::UXTB16(int cc, int Rd, int Rm, int rotate) {
|
||||
mTarget->UXTB16(cc, Rd, Rm, rotate);
|
||||
}
|
||||
|
||||
}; // namespace android
|
||||
|
||||
|
|
|
@ -114,6 +114,8 @@ public:
|
|||
virtual void SMLAW(int cc, int y,
|
||||
int Rd, int Rm, int Rs, int Rn);
|
||||
|
||||
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
|
||||
|
||||
private:
|
||||
ARMAssemblerInterface* mTarget;
|
||||
};
|
||||
|
|
|
@ -80,6 +80,7 @@
|
|||
* f - 1st fp operand (register) (bits 12-14)
|
||||
* g - 2nd fp operand (register) (bits 16-18)
|
||||
* h - 3rd fp operand (register/immediate) (bits 0-4)
|
||||
* j - xtb rotate literal (bits 10-11)
|
||||
* b - branch address
|
||||
* t - thumb branch address (bits 24, 0-23)
|
||||
* k - breakpoint comment (bits 0-3, 8-19)
|
||||
|
@ -122,6 +123,7 @@ static const struct arm32_insn arm32_i[] = {
|
|||
{ 0x0fe000f0, 0x00c00090, "smull", "Sdnms" },
|
||||
{ 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" },
|
||||
{ 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" },
|
||||
{ 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" },
|
||||
{ 0x0d700000, 0x04200000, "strt", "daW" },
|
||||
{ 0x0d700000, 0x04300000, "ldrt", "daW" },
|
||||
{ 0x0d700000, 0x04600000, "strbt", "daW" },
|
||||
|
@ -406,6 +408,10 @@ disasm(const disasm_interface_t *di, u_int loc, int altfmt)
|
|||
else
|
||||
di->di_printf("f%d", insn & 7);
|
||||
break;
|
||||
/* j - xtb rotate literal (bits 10-11) */
|
||||
case 'j':
|
||||
di->di_printf("ror #%d", ((insn >> 10) & 3) << 3);
|
||||
break;
|
||||
/* b - branch address */
|
||||
case 'b':
|
||||
branch = ((insn << 2) & 0x03ffffff);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "codeflinger/GGLAssembler.h"
|
||||
|
||||
#include <machine/cpu-features.h>
|
||||
|
||||
namespace android {
|
||||
|
||||
|
@ -567,7 +568,7 @@ void GGLAssembler::build_textures( fragment_parts_t& parts,
|
|||
RSB(GE, 0, height, height, imm(0));
|
||||
MUL(AL, 0, height, stride, height);
|
||||
} else {
|
||||
// u has not been CLAMPed yet
|
||||
// v has not been CLAMPed yet
|
||||
CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
|
||||
MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
|
||||
MOV(LE, 0, height, imm(0));
|
||||
|
@ -868,6 +869,106 @@ void GGLAssembler::filter24(
|
|||
load(txPtr, texel, 0);
|
||||
}
|
||||
|
||||
#if __ARM_ARCH__ >= 6
|
||||
// ARMv6 version, using UXTB16, and scheduled for Cortex-A8 pipeline
|
||||
void GGLAssembler::filter32(
|
||||
const fragment_parts_t& parts,
|
||||
pixel_t& texel, const texture_unit_t& tmu,
|
||||
int U, int V, pointer_t& txPtr,
|
||||
int FRAC_BITS)
|
||||
{
|
||||
const int adjust = FRAC_BITS*2 - 8;
|
||||
const int round = 0;
|
||||
const int prescale = 16 - adjust;
|
||||
|
||||
Scratch scratches(registerFile());
|
||||
|
||||
int pixel= scratches.obtain();
|
||||
int dh = scratches.obtain();
|
||||
int u = scratches.obtain();
|
||||
int k = scratches.obtain();
|
||||
|
||||
int temp = scratches.obtain();
|
||||
int dl = scratches.obtain();
|
||||
|
||||
int offsetrt = scratches.obtain();
|
||||
int offsetlb = scratches.obtain();
|
||||
|
||||
int pixellb = offsetlb;
|
||||
|
||||
// RB -> U * V
|
||||
CONTEXT_LOAD(offsetrt, generated_vars.rt);
|
||||
CONTEXT_LOAD(offsetlb, generated_vars.lb);
|
||||
if(!round) {
|
||||
MOV(AL, 0, U, reg_imm(U, LSL, prescale));
|
||||
}
|
||||
ADD(AL, 0, u, offsetrt, offsetlb);
|
||||
|
||||
LDR(AL, pixel, txPtr.reg, reg_scale_pre(u));
|
||||
if (round) {
|
||||
SMULBB(AL, u, U, V);
|
||||
RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
|
||||
} else {
|
||||
SMULWB(AL, u, U, V);
|
||||
RSB(AL, 0, U, U, imm(1<<(FRAC_BITS+prescale)));
|
||||
}
|
||||
UXTB16(AL, temp, pixel, 0);
|
||||
if (round) {
|
||||
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
|
||||
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
|
||||
}
|
||||
LDR(AL, pixellb, txPtr.reg, reg_scale_pre(offsetlb));
|
||||
MUL(AL, 0, dh, temp, u);
|
||||
UXTB16(AL, temp, pixel, 8);
|
||||
MUL(AL, 0, dl, temp, u);
|
||||
RSB(AL, 0, k, u, imm(0x100));
|
||||
|
||||
// LB -> (1-U) * V
|
||||
if (round) {
|
||||
SMULBB(AL, u, U, V);
|
||||
} else {
|
||||
SMULWB(AL, u, U, V);
|
||||
}
|
||||
UXTB16(AL, temp, pixellb, 0);
|
||||
if (round) {
|
||||
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
|
||||
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
|
||||
}
|
||||
MLA(AL, 0, dh, temp, u, dh);
|
||||
UXTB16(AL, temp, pixellb, 8);
|
||||
MLA(AL, 0, dl, temp, u, dl);
|
||||
SUB(AL, 0, k, k, u);
|
||||
|
||||
// LT -> (1-U)*(1-V)
|
||||
RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
|
||||
LDR(AL, pixel, txPtr.reg);
|
||||
if (round) {
|
||||
SMULBB(AL, u, U, V);
|
||||
} else {
|
||||
SMULWB(AL, u, U, V);
|
||||
}
|
||||
UXTB16(AL, temp, pixel, 0);
|
||||
if (round) {
|
||||
ADD(AL, 0, u, u, imm(1<<(adjust-1)));
|
||||
MOV(AL, 0, u, reg_imm(u, LSR, adjust));
|
||||
}
|
||||
MLA(AL, 0, dh, temp, u, dh);
|
||||
UXTB16(AL, temp, pixel, 8);
|
||||
MLA(AL, 0, dl, temp, u, dl);
|
||||
|
||||
// RT -> U*(1-V)
|
||||
LDR(AL, pixel, txPtr.reg, reg_scale_pre(offsetrt));
|
||||
SUB(AL, 0, u, k, u);
|
||||
UXTB16(AL, temp, pixel, 0);
|
||||
MLA(AL, 0, dh, temp, u, dh);
|
||||
UXTB16(AL, temp, pixel, 8);
|
||||
MLA(AL, 0, dl, temp, u, dl);
|
||||
|
||||
UXTB16(AL, dh, dh, 8);
|
||||
UXTB16(AL, dl, dl, 8);
|
||||
ORR(AL, 0, texel.reg, dh, reg_imm(dl, LSL, 8));
|
||||
}
|
||||
#else
|
||||
void GGLAssembler::filter32(
|
||||
const fragment_parts_t& parts,
|
||||
pixel_t& texel, const texture_unit_t& tmu,
|
||||
|
@ -955,6 +1056,7 @@ void GGLAssembler::filter32(
|
|||
AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
|
||||
ORR(AL, 0, texel.reg, dh, dl);
|
||||
}
|
||||
#endif
|
||||
|
||||
void GGLAssembler::build_texture_environment(
|
||||
component_t& fragment,
|
||||
|
|
Loading…
Reference in New Issue