CrystalSpace

Public API Reference

Main Page | Modules | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

csqsqrt.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2000 by Andrew Zabolotny (Intel version)
00003     Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version)
00004     Fast computation of sqrt(x) and 1/sqrt(x)
00005   
00006     This library is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Library General Public
00008     License as published by the Free Software Foundation; either
00009     version 2 of the License, or (at your option) any later version.
00010   
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Library General Public License for more details.
00015   
00016     You should have received a copy of the GNU Library General Public
00017     License along with this library; if not, write to the Free
00018     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019 */
00020 
00029 #ifndef __CS_CSQSQRT_H__
00030 #define __CS_CSQSQRT_H__
00031 
00039 static inline float csQsqrt (float x);
00040 
00048 static inline float csQisqrt (float x);
00049 
00052 #if (!defined (CS_NO_QSQRT)) && defined (CS_PROCESSOR_X86) && defined (CS_COMPILER_GCC)
00053 
00071 static inline float csQsqrt (float x)
00072 {
00073   float ret;
00074   static float c1=0.5f;
00075   static float c2=1.5f;
00076 
00077 
00078 // Original C++ formulae:
00079 // float tmp = x;
00080 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1;
00081 // double h = x * 0.5;
00082 // double a = tmp;
00083 // a *= 1.5 - a * a * h;
00084 // a *= 1.5 - a * a * h;
00085 // return a * x;
00086 
00087   // Use __volatile__ so that the compiler will not mess with this
00088   // code. Under some versions of gcc in combination with -O2 optimize
00089   // mode not using __volatile__ can cause errors.
00090   __asm__ __volatile__ (
00091                 "flds   %1\n"                   // x
00092                 "movl   $0xbe6f0000,%%eax\n"
00093                 "subl   %1,%%eax\n"
00094                 "shrl   $1,%%eax\n"
00095                 "movl   %%eax,%1\n"
00096                 "flds   %2\n"                   // x 0.5
00097                 "fmul   %%st(1)\n"              // x h
00098                 "flds   %3\n"                   // x h 1.5
00099                 "flds   %1\n"                   // x h 1.5 a
00100                 "fld    %%st\n"                 // x h 1.5 a a
00101                 "fmul   %%st\n"                 // x h 1.5 a a*a
00102                 "fmul   %%st(3)\n"              // x h 1.5 a a*a*h
00103                 "fsubr  %%st(2)\n"              // x h 1.5 a 1.5-a*a*h
00104                 "fmulp  %%st(1)\n"              // x h 1.5 a
00105                 "fld    %%st\n"                 // x h 1.5 a a
00106                 "fmul   %%st\n"                 // x h 1.5 a a*a
00107                 "fmulp  %%st(3)\n"              // x a*a*h 1.5 a
00108                 "fxch\n"                        // x a*a*h a 1.5
00109                 "fsubp  %%st,%%st(2)\n"         // x 1.5-a*a*h a
00110                 "fmulp  %%st(1)\n"              // x a
00111                 "fmulp  %%st(1)\n"              // a
00112         : "=&t" (ret), "+m" (x) : "m" (c1), "m" (c2)
00113         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00114   );
00115   return ret;
00116 }
00117 
00125 static inline float csQisqrt (float x)
00126 {
00127   float ret;
00128   static float c1=0.5f;
00129   static float c2=1.5f;
00130   // Use __volatile__ so that the compiler will not mess with this
00131   // code. Under some versions of gcc in combination with -O2 optimize
00132   // mode not using __volatile__ can cause errors.
00133   __asm__ __volatile__ (
00134                 "flds   %1\n"                   // x
00135                 "movl   $0xbe6f0000,%%eax\n"
00136                 "subl   %1,%%eax\n"
00137                 "shrl   $1,%%eax\n"
00138                 "movl   %%eax,%1\n"
00139                 "flds   %2\n"                   // x 0.5
00140                 "fmulp  %%st(1)\n"              // h
00141                 "flds   %3\n"                   // h 1.5
00142                 "flds   %1\n"                   // h 1.5 a
00143                 "fld    %%st\n"                 // h 1.5 a a
00144                 "fmul   %%st\n"                 // h 1.5 a a*a
00145                 "fmul   %%st(3)\n"              // h 1.5 a a*a*h
00146                 "fsubr  %%st(2)\n"              // h 1.5 a 1.5-a*a*h
00147                 "fmulp  %%st(1)\n"              // h 1.5 a
00148                 "fld    %%st\n"                 // h 1.5 a a
00149                 "fmul   %%st\n"                 // h 1.5 a a*a
00150                 "fmulp  %%st(3)\n"              // a*a*h 1.5 a
00151                 "fxch\n"                        // a*a*h a 1.5
00152                 "fsubp  %%st,%%st(2)\n"         // 1.5-a*a*h a
00153                 "fmulp  %%st(1)\n"              // a
00154         : "=t" (ret), "+m" (x) : "m" (c1), "m" (c2)
00155         : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00156   );
00157   return ret;
00158 }
00159 
00160 #elif (!defined (CS_NO_QSQRT)) && defined (CS_PROCESSOR_POWERPC) && defined (CS_COMPILER_GCC)
00161 
00169 static inline float csQsqrt(float x)
00170 {
00171   float y0 = 0.0;
00172 
00173   if (x != 0.0)
00174   {
00175     float x0 = x * 0.5f;
00176 
00177     __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00178     
00179     y0 = y0 * (1.5f - x0 * y0 * y0);
00180     y0 = (y0 * (1.5f - x0 * y0 * y0)) * x;
00181   };
00182     
00183   return y0;
00184 };
00185 
00190 static inline float csQisqrt(float x)
00191 {
00192   float x0 = x * 0.5f;
00193   float y0;
00194   __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x));
00195     
00196   y0 = y0 * (1.5f - x0 * y0 * y0);
00197   y0 = y0 * (1.5f - x0 * y0 * y0);
00198 
00199   return y0;
00200 };
00201 
00202 #elif (!defined (CS_NO_QSQRT)) && defined (CS_PROCESSOR_X86) && defined (CS_COMPILER_MSVC)
00203 
00204 #include <math.h>
00205 static inline float csQsqrt (float x) { return sqrtf(x); }
00206 static inline float csQisqrt(float x) { return 1.0f / sqrtf(x); }
00207 
00208 #else
00209 
00210 #include <math.h>
00211 static inline float csQsqrt (float x) { return (float)sqrt(x); }
00212 static inline float csQisqrt(float x) { return (float)(1.0 / sqrt(x)); }
00213 
00214 #endif
00215 
00216 #endif // __CS_CSQSQRT_H__

Generated for Crystal Space by doxygen 1.3.9.1