What is the most efficient general sin(x) algorithm, and how efficient is it?
An answer to this question on Stack Overflow.
Question
Using some series work messing with factorials, I discovered a connection between a polynomial expansion of a factorial and a polynomial expansion of a sin that were very similar.
After creating a small program to generate coefficients of the factorial's polynomial expansion (it's actually really pointless here as it requires calculation of factorial in order to generate a factorial's :P), I was able to add a multiplication by Pi and create an O(n^2) algorithm for sin(x). I wanted to compare it to other algorithms for calculating for sin.
I looked online, but I couldn't find much about sin calculations. I found CORDIC, but couldn't find anything about efficient implementations, so eventually I gave up and came to ask here as it's a good question for anyone interested in the calculations of trigonometric functions in computers.
So what's the typical and/or best way to find sin(x) in software and how does my algorithm stand up?
Answer
The best way of Googling this is to figure out how it's done by some popular language, such as by searching for std::sin implementation, which would give you the C++ implementation.
What follows is one of the implementations used by the GCC standard mathematical library (drawn from here).
/*******************************************************************/
/* An ultimate sin routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of sin(x) */
/*******************************************************************/
#ifndef IN_SINCOS
double
SECTION
__sin (double x)
{
double t, a, da;
mynumber u;
int4 k, m, n;
double retval = 0;
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
u.x = x;
m = u.i[HIGH_HALF];
k = 0x7fffffff & m; /* no sign */
if (k < 0x3e500000) /* if x->0 =>sin(x)=x */
{
math_check_force_underflow (x);
retval = x;
}
/*--------------------------- 2^-26<|x|< 0.855469---------------------- */
else if (k < 0x3feb6000)
{
/* Max ULP is 0.548. */
retval = do_sin (x, 0);
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
else if (k < 0x400368fd)
{
t = hp0 - fabs (x);
/* Max ULP is 0.51. */
retval = copysign (do_cos (t, hp1), x);
} /* else if (k < 0x400368fd) */
/*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
else if (k < 0x419921FB)
{
n = reduce_sincos (x, &a, &da);
retval = do_sincos (a, da, n);
} /* else if (k < 0x419921FB ) */
/* --------------------105414350 <|x| <2^1024------------------------------*/
else if (k < 0x7ff00000)
{
n = __branred (x, &a, &da);
retval = do_sincos (a, da, n);
}
/*--------------------- |x| > 2^1024 ----------------------------------*/
else
{
if (k == 0x7ff00000 && u.i[LOW_HALF] == 0)
__set_errno (EDOM);
retval = x / x;
}
return retval;
}
this calls
/* Given a number partitioned into X and DX, this function computes the sine of
the number by combining the sin and cos of X (as computed by a variation of
the Taylor series) with the values looked up from the sin/cos table to get
the result. */
static inline double
__always_inline
do_sin (double x, double dx)
{
double xold = x;
/* Max ULP is 0.501 if |x| < 0.126, otherwise ULP is 0.518. */
if (fabs (x) < 0.126)
return TAYLOR_SIN (x * x, x, dx);
mynumber u;
if (x <= 0)
dx = -dx;
u.x = big + fabs (x);
x = fabs (x) - (u.x - big);
double xx, s, sn, ssn, c, cs, ccs, cor;
xx = x * x;
s = x + (dx + x * xx * (sn3 + xx * sn5));
c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
return copysign (sn + cor, xold);
}
and
/* Given a number partitioned into X and DX, this function computes the cosine
of the number by combining the sin and cos of X (as computed by a variation
of the Taylor series) with the values looked up from the sin/cos table to
get the result. */
static inline double
__always_inline
do_cos (double x, double dx)
{
mynumber u;
if (x < 0)
dx = -dx;
u.x = big + fabs (x);
x = fabs (x) - (u.x - big) + dx;
double xx, s, sn, ssn, c, cs, ccs, cor;
xx = x * x;
s = x + x * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ccs - s * ssn - cs * c) - sn * s;
return cs + cor;
}
as
#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \
({ \
int4 k = u.i[LOW_HALF] << 2; \
sn = __sincostab.x[k]; \
ssn = __sincostab.x[k + 1]; \
cs = __sincostab.x[k + 2]; \
ccs = __sincostab.x[k + 3]; \
})
So the answer to your question is that a smart implementation combines Taylor series expansions with table look-ups.
Here's another such approach, used by Sun:
/* @(#)k_sin.c 1.3 95/01/18 */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __kernel_sin( x, y, iy)
* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
*
* Algorithm
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
* 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4]
* 3 13
* sin(x) ~ x + S1*x + ... + S6*x
* where
*
* |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
* | x |
*
* 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y
* For better accuracy, let
* 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
*/
#include "fdlibm.h"
#ifdef __STDC__
static const double
#else
static double
#endif
half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
#ifdef __STDC__
double __kernel_sin(double x, double y, int iy)
#else
double __kernel_sin(x, y, iy)
double x,y; int iy; /* iy=0 if y is zero */
#endif
{
double z,r,v;
int ix;
ix = __HI(x)&0x7fffffff; /* high word of x */
if(ix<0x3e400000) /* |x| < 2**-27 */
{if((int)x==0) return x;} /* generate inexact */
z = x*x;
v = z*x;
r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
if(iy==0) return x+v*(S1+z*r);
else return x-((z*(half*y-v*r)-y)-v*S1);
}
Other methods include using assembly instructions so that the calculation is done in hardware (though this has occasionally led to problems).