diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk index ed2ab5e72..1947c2d0f 100644 --- a/libpixelflinger/Android.mk +++ b/libpixelflinger/Android.mk @@ -43,6 +43,11 @@ ifeq ($(TARGET_ARCH),arm) PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif +ifeq ($(TARGET_ARCH),mips) +PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S +PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer +endif + LOCAL_SHARED_LIBRARIES := libcutils ifneq ($(TARGET_ARCH),arm) diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S new file mode 100644 index 000000000..c911fbba2 --- /dev/null +++ b/libpixelflinger/arch-mips/t32cb16blend.S @@ -0,0 +1,264 @@ +/* libs/pixelflinger/t32cb16blend.S +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#ifdef DEBUG +#define DBG +#else +#define DBG # +#endif + +/* + * blend one of 2 16bpp RGB pixels held in dreg selected by shift + * with the 32bpp ABGR pixel held in src and store the result in fb + * + * Assumes that the dreg data is little endian and that + * the the second pixel (shift==16) will be merged into + * the fb result + * + * Uses $t0,$t6,$t7,$t8 + */ + +#if __mips==32 && __mips_isa_rev>=2 + .macro pixel dreg src fb shift + /* + * sA = s >> 24 + * f = 0x100 - (sA + (sA>>7)) + */ +DBG .set noat +DBG rdhwr $at,$2 +DBG .set at + + srl $t7,\src,24 + srl $t6,$t7,7 + addu $t7,$t6 + li $t6,0x100 + subu $t7,$t6,$t7 + + /* red */ + ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11] + mul $t6,$t8,$t7 + ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5] + ext $t8,\src,3,5 # src[7..3] + srl $t6,8 + addu $t8,$t6 + ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11] + + /* green */ + mul $t8,$t0,$t7 + ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0] + ext $t6,\src,2+8,6 # src[15..10] + srl $t8,8 + addu $t8,$t6 + + /* blue */ + mul $t0,$t0,$t7 + ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5] + ext $t6,\src,(3+8+8),5 + srl $t8,$t0,8 + addu $t8,$t6 + ins \fb,$t8,\shift,5 + +DBG .set noat +DBG rdhwr $t8,$2 +DBG subu $t8,$at +DBG sltu $at,$t8,$v0 +DBG movn $v0,$t8,$at +DBG sgtu $at,$t8,$v1 +DBG movn $v1,$t8,$at +DBG .set at + .endm + +#else + + .macro pixel dreg src fb shift + /* + * sA = s >> 24 + * f = 0x100 - (sA + (sA>>7)) + */ +DBG .set push +DBG .set noat +DBG .set mips32r2 +DBG rdhwr $at,$2 +DBG .set pop + + srl $t7,\src,24 + srl $t6,$t7,7 + addu $t7,$t6 + li $t6,0x100 + subu $t7,$t6,$t7 + + /* + * red + * dR = (d >> (6 + 5)) & 0x1f; + * dR = (f*dR)>>8 + * sR = (s >> ( 3)) & 0x1f; + * sR += dR + * fb |= sR << 11 + */ + srl $t8,\dreg,\shift+6+5 +.if \shift==0 + and $t8,0x1f +.endif + mul $t8,$t8,$t7 + srl $t6,\src,3 + and $t6,0x1f + srl $t8,8 + addu $t8,$t6 +.if \shift!=0 + sll $t8,\shift+11 + or \fb,$t8 +.else + sll \fb,$t8,11 +.endif + + /* + * green + * dG = (d >> 5) & 0x3f + * dG = (f*dG) >> 8 + * sG = (s >> ( 8+2))&0x3F; + */ + srl $t8,\dreg,\shift+5 + and $t8,0x3f + mul $t8,$t8,$t7 + srl $t6,\src,8+2 + and $t6,0x3f + srl $t8,8 + addu $t8,$t6 + sll $t8,\shift + 5 + or \fb,$t8 + + /* blue */ +.if \shift!=0 + srl $t8,\dreg,\shift + and $t8,0x1f +.else + and $t8,\dreg,0x1f +.endif + mul $t8,$t8,$t7 + srl $t6,\src,(8+8+3) + and $t6,0x1f + srl $t8,8 + addu $t8,$t6 +.if \shift!=0 + sll $t8,\shift +.endif + or \fb,$t8 +DBG .set push +DBG .set noat +DBG .set mips32r2 +DBG rdhwr $t8,$2 +DBG subu $t8,$at +DBG sltu $at,$t8,$v0 +DBG movn $v0,$t8,$at +DBG sgtu $at,$t8,$v1 +DBG movn $v1,$t8,$at +DBG .set pop + .endm +#endif + + .text + .align + + .global scanline_t32cb16blend_mips + .ent scanline_t32cb16blend_mips +scanline_t32cb16blend_mips: +DBG li $v0,0xffffffff +DBG li $v1,0 + /* Align the destination if necessary */ + and $t0,$a0,3 + beqz $t0,aligned + + /* as long as there is at least one pixel */ + beqz $a2,done + + lw $t4,($a1) + addu $a0,2 + addu $a1,4 + beqz $t4,1f + lhu $t3,-2($a0) + pixel $t3,$t4,$t1,0 + sh $t1,-2($a0) +1: subu $a2,1 + +aligned: + /* Check to see if its worth unrolling the loop */ + subu $a2,4 + bltz $a2,tail + + /* Process 4 pixels at a time */ +fourpixels: + /* 1st pair of pixels */ + lw $t4,0($a1) + lw $t5,4($a1) + addu $a0,8 + addu $a1,16 + + /* both are zero, skip this pair */ + or $t3,$t4,$t5 + beqz $t3,1f + + /* load the destination */ + lw $t3,-8($a0) + + pixel $t3,$t4,$t1,0 + pixel $t3,$t5,$t1,16 + sw $t1,-8($a0) + +1: + /* 2nd pair of pixels */ + lw $t4,-8($a1) + lw $t5,-4($a1) + + /* both are zero, skip this pair */ + or $t3,$t4,$t5 + beqz $t3,1f + + /* load the destination */ + lw $t3,-4($a0) + + pixel $t3,$t4,$t1,0 + pixel $t3,$t5,$t1,16 + sw $t1,-4($a0) + +1: subu $a2,4 + bgtz $a2,fourpixels + +tail: + /* the pixel count underran, restore it now */ + addu $a2,4 + + /* handle the last 0..3 pixels */ + beqz $a2,done +onepixel: + lw $t4,($a1) + addu $a0,2 + addu $a1,4 + beqz $t4,1f + lhu $t3,-2($a0) + pixel $t3,$t4,$t1,0 + sh $t1,-2($a0) +1: subu $a2,1 + bnez $a2,onepixel +done: +DBG .set push +DBG .set mips32r2 +DBG rdhwr $a0,$3 +DBG mul $v0,$a0 +DBG mul $v1,$a0 +DBG .set pop + j $ra + .end scanline_t32cb16blend_mips diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp index 93440f521..d1f3d9677 100644 --- a/libpixelflinger/scanline.cpp +++ b/libpixelflinger/scanline.cpp @@ -110,10 +110,14 @@ static void scanline_clear(context_t* c); static void rect_generic(context_t* c, size_t yc); static void rect_memcpy(context_t* c, size_t yc); +#if defined( __arm__) extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); +#elif defined(__mips__) +extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); +#endif // ---------------------------------------------------------------------------- @@ -2136,7 +2140,7 @@ last_one: void scanline_t32cb16blend(context_t* c) { -#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) +#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips))) int32_t x = c->iterators.xl; size_t ct = c->iterators.xr - x; int32_t y = c->iterators.y; @@ -2148,7 +2152,11 @@ void scanline_t32cb16blend(context_t* c) const int32_t v = (c->state.texture[0].shade.it0>>16) + y; uint32_t *src = reinterpret_cast(tex->data)+(u+(tex->stride*v)); +#ifdef __arm__ scanline_t32cb16blend_arm(dst, src, ct); +#else + scanline_t32cb16blend_mips(dst, src, ct); +#endif #else dst_iterator16 di(c); horz_iterator32 hi(c);