/* Copyright (c) 2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /******************************************************************************/ // ALGORITHM DESCRIPTION // --------------------- // // 1. RANGE REDUCTION // // We perform an initial range reduction from X to r with // // X =~= N * pi/32 + r // // so that |r| <= pi/64 + epsilon. We restrict inputs to those // where |N| <= 932560. Beyond this, the range reduction is // insufficiently accurate. For extremely small inputs, // denormalization can occur internally, impacting performance. // This means that the main path is actually only taken for // 2^-252 <= |X| < 90112. // // To avoid branches, we perform the range reduction to full // accuracy each time. // // X - N * (P_1 + P_2 + P_3) // // where P_1 and P_2 are 32-bit numbers (so multiplication by N // is exact) and P_3 is a 53-bit number. Together, these // approximate pi well enough for all cases in the restricted // range. // // The main reduction sequence is: // // y = 32/pi * x // N = integer(y) // (computed by adding and subtracting off SHIFTER) // // m_1 = N * P_1 // m_2 = N * P_2 // r_1 = x - m_1 // r = r_1 - m_2 // (this r can be used for most of the calculation) // // c_1 = r_1 - r // m_3 = N * P_3 // c_2 = c_1 - m_2 // c = c_2 - m_3 // // 2. MAIN ALGORITHM // // The algorithm uses a table lookup based on B = M * pi / 32 // where M = N mod 64. The stored values are: // sigma closest power of 2 to cos(B) // C_hl 53-bit cos(B) - sigma // S_hi + S_lo 2 * 53-bit sin(B) // // The computation is organized as follows: // // sin(B + r + c) = [sin(B) + sigma * r] + // r * (cos(B) - sigma) + // sin(B) * [cos(r + c) - 1] + // cos(B) * [sin(r + c) - r] // // which is approximately: // // [S_hi + sigma * r] + // C_hl * r + // S_lo + S_hi * [(cos(r) - 1) - r * c] + // (C_hl + sigma) * [(sin(r) - r) + c] // // and this is what is actually computed. We separate this sum // into four parts: // // hi + med + pols + corr // // where // // hi = S_hi + sigma r // med = C_hl * r // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) // // 3. POLYNOMIAL // // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * // (sin(r) - r) can be rearranged freely, since it is quite // small, so we exploit parallelism to the fullest. // // psc4 = SC_4 * r_1 // msc4 = psc4 * r // r2 = r * r // msc2 = SC_2 * r2 // r4 = r2 * r2 // psc3 = SC_3 + msc4 // psc1 = SC_1 + msc2 // msc3 = r4 * psc3 // sincospols = psc1 + msc3 // pols = sincospols * // // // 4. CORRECTION TERM // // This is where the "c" component of the range reduction is // taken into account; recall that just "r" is used for most of // the calculation. // // -c = m_3 - c_2 // -d = S_hi * r - (C_hl + sigma) // corr = -c * -d + S_lo // // 5. COMPENSATED SUMMATIONS // // The two successive compensated summations add up the high // and medium parts, leaving just the low parts to add up at // the end. // // rs = sigma * r // res_int = S_hi + rs // k_0 = S_hi - res_int // k_2 = k_0 + rs // med = C_hl * r // res_hi = res_int + med // k_1 = res_int - res_hi // k_3 = k_1 + med // // 6. FINAL SUMMATION // // We now add up all the small parts: // // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 // // Now the overall result is just: // // res_hi + res_lo // // 7. SMALL ARGUMENTS // // Inputs with |X| < 2^-252 are treated specially as // 1 - |x|. // // Special cases: // cos(NaN) = quiet NaN, and raise invalid exception // cos(INF) = NaN and raise invalid exception // cos(0) = 1 // /******************************************************************************/ #include # -- Begin static_func .text .align __bionic_asm_align .type static_func, @function static_func: ..B1.1: call ..L2 ..L2: popl %eax lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax lea static_const_table@GOTOFF(%eax), %eax ret .size static_func,.-static_func # -- End static_func # -- Begin cos ENTRY(cos) # parameter 1: 8 + %ebp ..B2.1: ..B2.2: pushl %ebp movl %esp, %ebp subl $120, %esp movl %ebx, 56(%esp) call static_func movl %eax, %ebx movsd 128(%esp), %xmm0 pextrw $3, %xmm0, %eax andl $32767, %eax subl $12336, %eax cmpl $4293, %eax ja .L_2TAG_PACKET_0.0.2 movsd 2160(%ebx), %xmm1 mulsd %xmm0, %xmm1 movapd 2240(%ebx), %xmm5 movsd 2224(%ebx), %xmm4 andpd %xmm0, %xmm4 orps %xmm4, %xmm5 movsd 2128(%ebx), %xmm3 movapd 2112(%ebx), %xmm2 addpd %xmm5, %xmm1 cvttsd2si %xmm1, %edx cvtsi2sdl %edx, %xmm1 mulsd %xmm1, %xmm3 unpcklpd %xmm1, %xmm1 addl $1865232, %edx movapd %xmm0, %xmm4 andl $63, %edx movapd 2096(%ebx), %xmm5 lea (%ebx), %eax shll $5, %edx addl %edx, %eax mulpd %xmm1, %xmm2 subsd %xmm3, %xmm0 mulsd 2144(%ebx), %xmm1 subsd %xmm3, %xmm4 movsd 8(%eax), %xmm7 unpcklpd %xmm0, %xmm0 movapd %xmm4, %xmm3 subsd %xmm2, %xmm4 mulpd %xmm0, %xmm5 subpd %xmm2, %xmm0 movapd 2064(%ebx), %xmm6 mulsd %xmm4, %xmm7 subsd %xmm4, %xmm3 mulpd %xmm0, %xmm5 mulpd %xmm0, %xmm0 subsd %xmm2, %xmm3 movapd (%eax), %xmm2 subsd %xmm3, %xmm1 movsd 24(%eax), %xmm3 addsd %xmm3, %xmm2 subsd %xmm2, %xmm7 mulsd %xmm4, %xmm2 mulpd %xmm0, %xmm6 mulsd %xmm4, %xmm3 mulpd %xmm0, %xmm2 mulpd %xmm0, %xmm0 addpd 2080(%ebx), %xmm5 mulsd (%eax), %xmm4 addpd 2048(%ebx), %xmm6 mulpd %xmm0, %xmm5 movapd %xmm3, %xmm0 addsd 8(%eax), %xmm3 mulpd %xmm7, %xmm1 movapd %xmm4, %xmm7 addsd %xmm3, %xmm4 addpd %xmm5, %xmm6 movsd 8(%eax), %xmm5 subsd %xmm3, %xmm5 subsd %xmm4, %xmm3 addsd 16(%eax), %xmm1 mulpd %xmm2, %xmm6 addsd %xmm0, %xmm5 addsd %xmm7, %xmm3 addsd %xmm5, %xmm1 addsd %xmm3, %xmm1 addsd %xmm6, %xmm1 unpckhpd %xmm6, %xmm6 addsd %xmm6, %xmm1 addsd %xmm1, %xmm4 movsd %xmm4, (%esp) fldl (%esp) jmp .L_2TAG_PACKET_1.0.2 .L_2TAG_PACKET_0.0.2: jg .L_2TAG_PACKET_2.0.2 pextrw $3, %xmm0, %eax andl $32767, %eax pinsrw $3, %eax, %xmm0 movsd 2192(%ebx), %xmm1 subsd %xmm0, %xmm1 movsd %xmm1, (%esp) fldl (%esp) jmp .L_2TAG_PACKET_1.0.2 .L_2TAG_PACKET_2.0.2: movl 132(%esp), %eax andl $2146435072, %eax cmpl $2146435072, %eax je .L_2TAG_PACKET_3.0.2 subl $32, %esp movsd %xmm0, (%esp) lea 40(%esp), %eax movl %eax, 8(%esp) movl $1, %eax movl %eax, 12(%esp) call __libm_sincos_huge addl $32, %esp fldl 8(%esp) jmp .L_2TAG_PACKET_1.0.2 .L_2TAG_PACKET_3.0.2: fldl 128(%esp) fmull 2208(%ebx) .L_2TAG_PACKET_1.0.2: movl 56(%esp), %ebx movl %ebp, %esp popl %ebp ret ..B2.3: END(cos) # -- End cos # Start file scope ASM ALIAS_SYMBOL(cosl, cos); # End file scope ASM .section .rodata, "a" .align 16 .align 16 static_const_table: .long 0 .long 0 .long 0 .long 0 .long 0 .long 0 .long 0 .long 1072693248 .long 393047345 .long 3212032302 .long 3156849708 .long 1069094822 .long 3758096384 .long 3158189848 .long 0 .long 1072693248 .long 18115067 .long 3214126342 .long 1013556747 .long 1070135480 .long 3221225472 .long 3160567065 .long 0 .long 1072693248 .long 2476548698 .long 3215330282 .long 785751814 .long 1070765062 .long 2684354560 .long 3161838221 .long 0 .long 1072693248 .long 2255197647 .long 3216211105 .long 2796464483 .long 1071152610 .long 3758096384 .long 3160878317 .long 0 .long 1072693248 .long 1945768569 .long 3216915048 .long 939980347 .long 1071524701 .long 536870912 .long 1012796809 .long 0 .long 1072693248 .long 1539668340 .long 3217396327 .long 967731400 .long 1071761211 .long 536870912 .long 1015752157 .long 0 .long 1072693248 .long 1403757309 .long 3217886718 .long 621354454 .long 1071926515 .long 536870912 .long 1013450602 .long 0 .long 1072693248 .long 2583490354 .long 1070236281 .long 1719614413 .long 1072079006 .long 536870912 .long 3163282740 .long 0 .long 1071644672 .long 2485417816 .long 1069626316 .long 1796544321 .long 1072217216 .long 536870912 .long 3162686945 .long 0 .long 1071644672 .long 2598800519 .long 1068266419 .long 688824739 .long 1072339814 .long 3758096384 .long 1010431536 .long 0 .long 1071644672 .long 2140183630 .long 3214756396 .long 4051746225 .long 1072445618 .long 2147483648 .long 3161907377 .long 0 .long 1071644672 .long 1699043957 .long 3216902261 .long 3476196678 .long 1072533611 .long 536870912 .long 1014257638 .long 0 .long 1071644672 .long 1991047213 .long 1067753521 .long 1455828442 .long 1072602945 .long 3758096384 .long 1015505073 .long 0 .long 1070596096 .long 240740309 .long 3215727903 .long 3489094832 .long 1072652951 .long 536870912 .long 1014325783 .long 0 .long 1070596096 .long 257503056 .long 3214647653 .long 2748392742 .long 1072683149 .long 1073741824 .long 3163061750 .long 0 .long 1069547520 .long 0 .long 0 .long 0 .long 1072693248 .long 0 .long 0 .long 0 .long 0 .long 257503056 .long 1067164005 .long 2748392742 .long 1072683149 .long 1073741824 .long 3163061750 .long 0 .long 3217031168 .long 240740309 .long 1068244255 .long 3489094832 .long 1072652951 .long 536870912 .long 1014325783 .long 0 .long 3218079744 .long 1991047213 .long 3215237169 .long 1455828442 .long 1072602945 .long 3758096384 .long 1015505073 .long 0 .long 3218079744 .long 1699043957 .long 1069418613 .long 3476196678 .long 1072533611 .long 536870912 .long 1014257638 .long 0 .long 3219128320 .long 2140183630 .long 1067272748 .long 4051746225 .long 1072445618 .long 2147483648 .long 3161907377 .long 0 .long 3219128320 .long 2598800519 .long 3215750067 .long 688824739 .long 1072339814 .long 3758096384 .long 1010431536 .long 0 .long 3219128320 .long 2485417816 .long 3217109964 .long 1796544321 .long 1072217216 .long 536870912 .long 3162686945 .long 0 .long 3219128320 .long 2583490354 .long 3217719929 .long 1719614413 .long 1072079006 .long 536870912 .long 3163282740 .long 0 .long 3219128320 .long 1403757309 .long 1070403070 .long 621354454 .long 1071926515 .long 536870912 .long 1013450602 .long 0 .long 3220176896 .long 1539668340 .long 1069912679 .long 967731400 .long 1071761211 .long 536870912 .long 1015752157 .long 0 .long 3220176896 .long 1945768569 .long 1069431400 .long 939980347 .long 1071524701 .long 536870912 .long 1012796809 .long 0 .long 3220176896 .long 2255197647 .long 1068727457 .long 2796464483 .long 1071152610 .long 3758096384 .long 3160878317 .long 0 .long 3220176896 .long 2476548698 .long 1067846634 .long 785751814 .long 1070765062 .long 2684354560 .long 3161838221 .long 0 .long 3220176896 .long 18115067 .long 1066642694 .long 1013556747 .long 1070135480 .long 3221225472 .long 3160567065 .long 0 .long 3220176896 .long 393047345 .long 1064548654 .long 3156849708 .long 1069094822 .long 3758096384 .long 3158189848 .long 0 .long 3220176896 .long 0 .long 0 .long 0 .long 0 .long 0 .long 0 .long 0 .long 3220176896 .long 393047345 .long 1064548654 .long 3156849708 .long 3216578470 .long 3758096384 .long 1010706200 .long 0 .long 3220176896 .long 18115067 .long 1066642694 .long 1013556747 .long 3217619128 .long 3221225472 .long 1013083417 .long 0 .long 3220176896 .long 2476548698 .long 1067846634 .long 785751814 .long 3218248710 .long 2684354560 .long 1014354573 .long 0 .long 3220176896 .long 2255197647 .long 1068727457 .long 2796464483 .long 3218636258 .long 3758096384 .long 1013394669 .long 0 .long 3220176896 .long 1945768569 .long 1069431400 .long 939980347 .long 3219008349 .long 536870912 .long 3160280457 .long 0 .long 3220176896 .long 1539668340 .long 1069912679 .long 967731400 .long 3219244859 .long 536870912 .long 3163235805 .long 0 .long 3220176896 .long 1403757309 .long 1070403070 .long 621354454 .long 3219410163 .long 536870912 .long 3160934250 .long 0 .long 3220176896 .long 2583490354 .long 3217719929 .long 1719614413 .long 3219562654 .long 536870912 .long 1015799092 .long 0 .long 3219128320 .long 2485417816 .long 3217109964 .long 1796544321 .long 3219700864 .long 536870912 .long 1015203297 .long 0 .long 3219128320 .long 2598800519 .long 3215750067 .long 688824739 .long 3219823462 .long 3758096384 .long 3157915184 .long 0 .long 3219128320 .long 2140183630 .long 1067272748 .long 4051746225 .long 3219929266 .long 2147483648 .long 1014423729 .long 0 .long 3219128320 .long 1699043957 .long 1069418613 .long 3476196678 .long 3220017259 .long 536870912 .long 3161741286 .long 0 .long 3219128320 .long 1991047213 .long 3215237169 .long 1455828442 .long 3220086593 .long 3758096384 .long 3162988721 .long 0 .long 3218079744 .long 240740309 .long 1068244255 .long 3489094832 .long 3220136599 .long 536870912 .long 3161809431 .long 0 .long 3218079744 .long 257503056 .long 1067164005 .long 2748392742 .long 3220166797 .long 1073741824 .long 1015578102 .long 0 .long 3217031168 .long 0 .long 0 .long 0 .long 3220176896 .long 0 .long 0 .long 0 .long 0 .long 257503056 .long 3214647653 .long 2748392742 .long 3220166797 .long 1073741824 .long 1015578102 .long 0 .long 1069547520 .long 240740309 .long 3215727903 .long 3489094832 .long 3220136599 .long 536870912 .long 3161809431 .long 0 .long 1070596096 .long 1991047213 .long 1067753521 .long 1455828442 .long 3220086593 .long 3758096384 .long 3162988721 .long 0 .long 1070596096 .long 1699043957 .long 3216902261 .long 3476196678 .long 3220017259 .long 536870912 .long 3161741286 .long 0 .long 1071644672 .long 2140183630 .long 3214756396 .long 4051746225 .long 3219929266 .long 2147483648 .long 1014423729 .long 0 .long 1071644672 .long 2598800519 .long 1068266419 .long 688824739 .long 3219823462 .long 3758096384 .long 3157915184 .long 0 .long 1071644672 .long 2485417816 .long 1069626316 .long 1796544321 .long 3219700864 .long 536870912 .long 1015203297 .long 0 .long 1071644672 .long 2583490354 .long 1070236281 .long 1719614413 .long 3219562654 .long 536870912 .long 1015799092 .long 0 .long 1071644672 .long 1403757309 .long 3217886718 .long 621354454 .long 3219410163 .long 536870912 .long 3160934250 .long 0 .long 1072693248 .long 1539668340 .long 3217396327 .long 967731400 .long 3219244859 .long 536870912 .long 3163235805 .long 0 .long 1072693248 .long 1945768569 .long 3216915048 .long 939980347 .long 3219008349 .long 536870912 .long 3160280457 .long 0 .long 1072693248 .long 2255197647 .long 3216211105 .long 2796464483 .long 3218636258 .long 3758096384 .long 1013394669 .long 0 .long 1072693248 .long 2476548698 .long 3215330282 .long 785751814 .long 3218248710 .long 2684354560 .long 1014354573 .long 0 .long 1072693248 .long 18115067 .long 3214126342 .long 1013556747 .long 3217619128 .long 3221225472 .long 1013083417 .long 0 .long 1072693248 .long 393047345 .long 3212032302 .long 3156849708 .long 3216578470 .long 3758096384 .long 1010706200 .long 0 .long 1072693248 .long 1431655765 .long 3217380693 .long 0 .long 3219128320 .long 286331153 .long 1065423121 .long 1431655765 .long 1067799893 .long 436314138 .long 3207201184 .long 381774871 .long 3210133868 .long 2773927732 .long 1053236707 .long 436314138 .long 1056571808 .long 442499072 .long 1032893537 .long 442499072 .long 1032893537 .long 1413480448 .long 1069097467 .long 0 .long 0 .long 771977331 .long 996350346 .long 0 .long 0 .long 1841940611 .long 1076125488 .long 0 .long 0 .long 0 .long 1127743488 .long 0 .long 0 .long 0 .long 1072693248 .long 0 .long 0 .long 0 .long 2147483648 .long 0 .long 0 .long 0 .long 2147483648 .long 0 .long 0 .long 0 .long 1071644672 .long 0 .long 1071644672 .type static_const_table,@object .size static_const_table,2256 .data .hidden __libm_sincos_huge .section .note.GNU-stack, "",@progbits # End