am 9d881764: fix part of [2017702] OpenGL bugs with alpha values of 1.0 in the source during blending into 8888 buffers
Merge commit '9d881764173ce16badb6f1098ba5cf44b36f9aec' * commit '9d881764173ce16badb6f1098ba5cf44b36f9aec': fix part of [2017702] OpenGL bugs with alpha values of 1.0 in the source during blending into 8888 buffers
This commit is contained in:
commit
a89d4d0236
|
@ -21,53 +21,80 @@
|
|||
|
||||
.global scanline_t32cb16blend_arm
|
||||
|
||||
// uses r6, r7, lr
|
||||
|
||||
.macro pixel, DREG, SRC, FB, OFFSET
|
||||
/*
|
||||
* .macro pixel
|
||||
*
|
||||
* \DREG is a 32-bit register containing *two* original destination RGB565
|
||||
* pixels, with the even one in the low-16 bits, and the odd one in the
|
||||
* high 16 bits.
|
||||
*
|
||||
* \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
|
||||
*
|
||||
* \FB is a target register that will contain the blended pixel values.
|
||||
*
|
||||
* \ODD is either 0 or 1 and indicates if we're blending the lower or
|
||||
* upper 16-bit pixels in DREG into FB
|
||||
*
|
||||
*
|
||||
* clobbered: r6, r7, lr
|
||||
*
|
||||
*/
|
||||
|
||||
// SRC = AARRGGBB
|
||||
.macro pixel, DREG, SRC, FB, ODD
|
||||
|
||||
// SRC = 0xAABBGGRR
|
||||
mov r7, \SRC, lsr #24 // sA
|
||||
add r7, r7, r7, lsr #7 // sA + (sA >> 7)
|
||||
rsb r7, r7, #0x100 // sA = 0x100 - (sA+(sA>>7))
|
||||
|
||||
1:
|
||||
|
||||
.if \OFFSET
|
||||
.if \ODD
|
||||
|
||||
// red
|
||||
mov lr, \DREG, lsr #(\OFFSET + 6 + 5)
|
||||
mov lr, \DREG, lsr #(16 + 11)
|
||||
smulbb lr, r7, lr
|
||||
mov r6, \SRC, lsr #3
|
||||
and r6, r6, #0x1F
|
||||
add lr, r6, lr, lsr #8
|
||||
orr \FB, lr, lsl #(\OFFSET + 11)
|
||||
cmp lr, #0x1F
|
||||
orrhs \FB, \FB, #(0x1F<<(16 + 11))
|
||||
orrlo \FB, \FB, lr, lsl #(16 + 11)
|
||||
|
||||
// green
|
||||
and r6, \DREG, #(0x3F<<(\OFFSET + 5))
|
||||
and r6, \DREG, #(0x3F<<(16 + 5))
|
||||
smulbt r6, r7, r6
|
||||
mov lr, \SRC, lsr #(8+2)
|
||||
and lr, lr, #0x3F
|
||||
add r6, lr, r6, lsr #(5+8)
|
||||
orr \FB, \FB, r6, lsl #(\OFFSET + 5)
|
||||
cmp r6, #0x3F
|
||||
orrhs \FB, \FB, #(0x3F<<(16 + 5))
|
||||
orrlo \FB, \FB, r6, lsl #(16 + 5)
|
||||
|
||||
// blue
|
||||
and lr, \DREG, #(0x1F << \OFFSET)
|
||||
and lr, \DREG, #(0x1F << 16)
|
||||
smulbt lr, r7, lr
|
||||
mov r6, \SRC, lsr #(8+8+3)
|
||||
and r6, r6, #0x1F
|
||||
add lr, r6, lr, lsr #8
|
||||
orr \FB, \FB, lr, lsl #\OFFSET
|
||||
cmp lr, #0x1F
|
||||
orrhs \FB, \FB, #(0x1F << 16)
|
||||
orrlo \FB, \FB, lr, lsl #16
|
||||
|
||||
.else
|
||||
|
||||
// red
|
||||
mov lr, \DREG, lsr #(6+5)
|
||||
mov lr, \DREG, lsr #11
|
||||
and lr, lr, #0x1F
|
||||
smulbb lr, r7, lr
|
||||
mov r6, \SRC, lsr #3
|
||||
and r6, r6, #0x1F
|
||||
add lr, r6, lr, lsr #8
|
||||
mov \FB, lr, lsl #11
|
||||
cmp lr, #0x1F
|
||||
movhs \FB, #(0x1F<<11)
|
||||
movlo \FB, lr, lsl #11
|
||||
|
||||
|
||||
// green
|
||||
and r6, \DREG, #(0x3F<<5)
|
||||
|
@ -75,7 +102,9 @@
|
|||
mov lr, \SRC, lsr #(8+2)
|
||||
and lr, lr, #0x3F
|
||||
add r6, lr, r6, lsr #(5+8)
|
||||
orr \FB, \FB, r6, lsl #5
|
||||
cmp r6, #0x3F
|
||||
orrhs \FB, \FB, #(0x3F<<5)
|
||||
orrlo \FB, \FB, r6, lsl #5
|
||||
|
||||
// blue
|
||||
and lr, \DREG, #0x1F
|
||||
|
@ -83,7 +112,9 @@
|
|||
mov r6, \SRC, lsr #(8+8+3)
|
||||
and r6, r6, #0x1F
|
||||
add lr, r6, lr, lsr #8
|
||||
orr \FB, \FB, lr
|
||||
cmp lr, #0x1F
|
||||
orrhs \FB, \FB, #0x1F
|
||||
orrlo \FB, \FB, lr
|
||||
|
||||
.endif
|
||||
|
||||
|
@ -128,7 +159,7 @@ aligned:
|
|||
subs r2, r2, #2
|
||||
blo 9f
|
||||
|
||||
// The main loop is unrolled twice and process 4 pixels
|
||||
// The main loop is unrolled twice and processes 4 pixels
|
||||
8: ldmia r1!, {r4, r5}
|
||||
// stream the source
|
||||
pld [r1, #32]
|
||||
|
@ -142,7 +173,7 @@ aligned:
|
|||
// stream the destination
|
||||
pld [r0, #32]
|
||||
pixel r3, r4, r12, 0
|
||||
pixel r3, r5, r12, 16
|
||||
pixel r3, r5, r12, 1
|
||||
// effectively, we're getting write-combining by virtue of the
|
||||
// cpu's write-back cache.
|
||||
str r12, [r0, #-4]
|
||||
|
|
Loading…
Reference in New Issue