340 lines
8.6 KiB
C++
340 lines
8.6 KiB
C++
|
/* libs/pixelflinger/fixed.cpp
|
||
|
**
|
||
|
** Copyright 2006, The Android Open Source Project
|
||
|
**
|
||
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
** you may not use this file except in compliance with the License.
|
||
|
** You may obtain a copy of the License at
|
||
|
**
|
||
|
** http://www.apache.org/licenses/LICENSE-2.0
|
||
|
**
|
||
|
** Unless required by applicable law or agreed to in writing, software
|
||
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
** See the License for the specific language governing permissions and
|
||
|
** limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#include <private/pixelflinger/ggl_context.h>
|
||
|
#include <private/pixelflinger/ggl_fixed.h>
|
||
|
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
int32_t gglRecipQNormalized(int32_t x, int* exponent)
|
||
|
{
|
||
|
const int32_t s = x>>31;
|
||
|
uint32_t a = s ? -x : x;
|
||
|
|
||
|
// the result will overflow, so just set it to the biggest/inf value
|
||
|
if (ggl_unlikely(a <= 2LU)) {
|
||
|
*exponent = 0;
|
||
|
return s ? FIXED_MIN : FIXED_MAX;
|
||
|
}
|
||
|
|
||
|
// Newton-Raphson iteration:
|
||
|
// x = r*(2 - a*r)
|
||
|
|
||
|
const int32_t lz = gglClz(a);
|
||
|
a <<= lz; // 0.32
|
||
|
uint32_t r = a;
|
||
|
// note: if a == 0x80000000, this means x was a power-of-2, in this
|
||
|
// case we don't need to compute anything. We get the reciprocal for
|
||
|
// (almost) free.
|
||
|
if (a != 0x80000000) {
|
||
|
r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
|
||
|
// 0.32 + 2.30 = 2.62 -> 2.30
|
||
|
// 2.30 + 2.30 = 4.60 -> 2.30
|
||
|
r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
|
||
|
r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
|
||
|
}
|
||
|
|
||
|
// shift right 1-bit to make room for the sign bit
|
||
|
*exponent = 30-lz-1;
|
||
|
r >>= 1;
|
||
|
return s ? -r : r;
|
||
|
}
|
||
|
|
||
|
int32_t gglRecipQ(GGLfixed x, int q)
|
||
|
{
|
||
|
int shift;
|
||
|
x = gglRecipQNormalized(x, &shift);
|
||
|
shift += 16-q;
|
||
|
x += 1L << (shift-1); // rounding
|
||
|
x >>= shift;
|
||
|
return x;
|
||
|
}
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
|
||
|
{
|
||
|
if ((d>>24) && ((d>>24)+1)) {
|
||
|
n >>= 8;
|
||
|
d >>= 8;
|
||
|
}
|
||
|
return gglMulx(n, gglRecip(d));
|
||
|
}
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
|
||
|
// 1/sqrt(x) with x = 1-N/16, N=[8...1]
|
||
|
0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
|
||
|
};
|
||
|
|
||
|
GGLfixed gglSqrtRecipx(GGLfixed x)
|
||
|
{
|
||
|
if (x == 0) return FIXED_MAX;
|
||
|
if (x == FIXED_ONE) return x;
|
||
|
const GGLfixed a = x;
|
||
|
const int32_t lz = gglClz(x);
|
||
|
x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
|
||
|
const int32_t exp = lz - 16;
|
||
|
if (exp <= 0) x >>= -exp>>1;
|
||
|
else x <<= (exp>>1) + (exp & 1);
|
||
|
if (exp & 1) {
|
||
|
x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
|
||
|
}
|
||
|
// 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
|
||
|
x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
|
||
|
x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
|
||
|
return x;
|
||
|
}
|
||
|
|
||
|
GGLfixed gglSqrtx(GGLfixed a)
|
||
|
{
|
||
|
// Compute a full precision square-root (24 bits accuracy)
|
||
|
GGLfixed r = 0;
|
||
|
GGLfixed bit = 0x800000;
|
||
|
int32_t bshift = 15;
|
||
|
do {
|
||
|
GGLfixed temp = bit + (r<<1);
|
||
|
if (bshift >= 8) temp <<= (bshift-8);
|
||
|
else temp >>= (8-bshift);
|
||
|
if (a >= temp) {
|
||
|
r += bit;
|
||
|
a -= temp;
|
||
|
}
|
||
|
bshift--;
|
||
|
} while (bit>>=1);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
static const GGLfixed ggl_log_approx_tab[] = {
|
||
|
// -ln(x)/ln(2) with x = N/16, N=[8...16]
|
||
|
0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
|
||
|
};
|
||
|
|
||
|
static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
|
||
|
0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
|
||
|
};
|
||
|
|
||
|
GGLfixed gglPowx(GGLfixed x, GGLfixed y)
|
||
|
{
|
||
|
// prerequisite: 0 <= x <= 1, and y >=0
|
||
|
|
||
|
// pow(x,y) = 2^(y*log2(x))
|
||
|
// = 2^(y*log2(x*(2^exp)*(2^-exp))))
|
||
|
// = 2^(y*(log2(X)-exp))
|
||
|
// = 2^(log2(X)*y - y*exp)
|
||
|
// = 2^( - (-log2(X)*y + y*exp) )
|
||
|
|
||
|
int32_t exp = gglClz(x) - 16;
|
||
|
GGLfixed f = x << exp;
|
||
|
x = (f & 0x0FFF)<<4;
|
||
|
f = (f >> 12) & 0x7;
|
||
|
GGLfixed p = gglMulAddx(
|
||
|
ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
|
||
|
ggl_log_approx_tab[f]);
|
||
|
p = gglMulAddx(p, y, y*exp);
|
||
|
exp = gglFixedToIntFloor(p);
|
||
|
if (exp < 31) {
|
||
|
p = gglFracx(p);
|
||
|
x = (p & 0x1FFF)<<3;
|
||
|
p >>= 13;
|
||
|
p = gglMulAddx(
|
||
|
ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
|
||
|
ggl_alog_approx_tab[p]);
|
||
|
p >>= exp;
|
||
|
} else {
|
||
|
p = 0;
|
||
|
}
|
||
|
return p;
|
||
|
// ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
|
||
|
}
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
|
||
|
{
|
||
|
//int32_t r =int32_t((int64_t(n)<<i)/d);
|
||
|
const int32_t ds = n^d;
|
||
|
if (n<0) n = -n;
|
||
|
if (d<0) d = -d;
|
||
|
int nd = gglClz(d) - gglClz(n);
|
||
|
i += nd + 1;
|
||
|
if (nd > 0) d <<= nd;
|
||
|
else n <<= -nd;
|
||
|
uint32_t q = 0;
|
||
|
|
||
|
int j = i & 7;
|
||
|
i >>= 3;
|
||
|
|
||
|
// gcc deals with the code below pretty well.
|
||
|
// we get 3.75 cycles per bit in the main loop
|
||
|
// and 8 cycles per bit in the termination loop
|
||
|
if (ggl_likely(i)) {
|
||
|
n -= d;
|
||
|
do {
|
||
|
q <<= 8;
|
||
|
if (n>=0) q |= 128;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 64;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 32;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 16;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 8;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 4;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 2;
|
||
|
else n += d;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 1;
|
||
|
else n += d;
|
||
|
|
||
|
if (--i == 0)
|
||
|
goto finish;
|
||
|
|
||
|
n = n*2 - d;
|
||
|
} while(true);
|
||
|
do {
|
||
|
q <<= 1;
|
||
|
n = n*2 - d;
|
||
|
if (n>=0) q |= 1;
|
||
|
else n += d;
|
||
|
finish: ;
|
||
|
} while (j--);
|
||
|
return (ds<0) ? -q : q;
|
||
|
}
|
||
|
|
||
|
n -= d;
|
||
|
if (n>=0) q |= 1;
|
||
|
else n += d;
|
||
|
j--;
|
||
|
goto finish;
|
||
|
}
|
||
|
|
||
|
// ------------------------------------------------------------------------
|
||
|
|
||
|
// assumes that the int32_t values of a, b, and c are all positive
|
||
|
// use when both a and b are larger than c
|
||
|
|
||
|
template <typename T>
|
||
|
static inline void swap(T& a, T& b) {
|
||
|
T t(a);
|
||
|
a = b;
|
||
|
b = t;
|
||
|
}
|
||
|
|
||
|
static __attribute__((noinline))
|
||
|
int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
|
||
|
{
|
||
|
// first we compute a*b as a 64-bit integer
|
||
|
// (GCC generates umull with the code below)
|
||
|
uint64_t ab = uint64_t(a)*b;
|
||
|
uint32_t hi = ab>>32;
|
||
|
uint32_t lo = ab;
|
||
|
uint32_t result;
|
||
|
|
||
|
// now perform the division
|
||
|
if (hi >= c) {
|
||
|
overflow:
|
||
|
result = 0x7fffffff; // basic overflow
|
||
|
} else if (hi == 0) {
|
||
|
result = lo/c; // note: c can't be 0
|
||
|
if ((result >> 31) != 0) // result must fit in 31 bits
|
||
|
goto overflow;
|
||
|
} else {
|
||
|
uint32_t r = hi;
|
||
|
int bits = 31;
|
||
|
result = 0;
|
||
|
do {
|
||
|
r = (r << 1) | (lo >> 31);
|
||
|
lo <<= 1;
|
||
|
result <<= 1;
|
||
|
if (r >= c) {
|
||
|
r -= c;
|
||
|
result |= 1;
|
||
|
}
|
||
|
} while (bits--);
|
||
|
}
|
||
|
return int32_t(result);
|
||
|
}
|
||
|
|
||
|
// assumes a >= 0 and c >= b >= 0
|
||
|
static inline
|
||
|
int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
|
||
|
{
|
||
|
int32_t r = 0, q = 0, i;
|
||
|
int leading = gglClz(a);
|
||
|
i = 32 - leading;
|
||
|
a <<= leading;
|
||
|
do {
|
||
|
r <<= 1;
|
||
|
if (a < 0)
|
||
|
r += b;
|
||
|
a <<= 1;
|
||
|
q <<= 1;
|
||
|
if (r >= c) {
|
||
|
r -= c;
|
||
|
q++;
|
||
|
}
|
||
|
asm(""::); // gcc generates better code this way
|
||
|
if (r >= c) {
|
||
|
r -= c;
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
while (--i);
|
||
|
return q;
|
||
|
}
|
||
|
|
||
|
// this function computes a*b/c with 64-bit intermediate accuracy
|
||
|
// overflows (e.g. division by 0) are handled and return INT_MAX
|
||
|
|
||
|
int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
|
||
|
{
|
||
|
int32_t result;
|
||
|
int32_t sign = a^b^c;
|
||
|
|
||
|
if (a < 0) a = -a;
|
||
|
if (b < 0) b = -b;
|
||
|
if (c < 0) c = -c;
|
||
|
|
||
|
if (a < b) {
|
||
|
swap(a, b);
|
||
|
}
|
||
|
|
||
|
if (b <= c) result = quick_muldiv(a, b, c);
|
||
|
else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
|
||
|
|
||
|
if (sign < 0)
|
||
|
result = -result;
|
||
|
|
||
|
return result;
|
||
|
}
|