00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "../../../include/SIMD/SIMDAlignment.h"
00030 #if EXMAT_GNUC
00031 # include <inttypes.h>
00032 #else
00033 # include <basetsd.h>
00034 typedef INT32 int32_t;
00035 typedef UINT32 uint32_t;
00036 #endif
00037
00038 #if EXMAT_ENABLE_SIMD
00039
00040 #define AM_PI (3.1415926535897932384626433832795)
00041
00042 #if EXMAT_GNUC
00043
00044
00045
00046 # define ASM_NAME(name) asm(#name)
00047 #else
00048 # define ASM_NAME(name)
00049 #endif // EXMAT_GNUC
00050
00051 #if defined(EXMAT_SIMD_IMP_CONSTANT)
00052 # define AM_USE_PS_CONST(Name) \
00053 const float _ps_##Name[4]; \
00054 const __m128 Name = _mm_load_ps(_ps_##Name)
00055 # define _PS_CONST(Name, Val) \
00056 EXMAT_TYPE_ALIGN_PREFIX(16) float _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16) = { Val, Val, Val, Val }
00057 # define _PS_CONST4(Name, Val0, Val1, Val2, Val3) \
00058 EXMAT_TYPE_ALIGN_PREFIX(16) float _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16) = { Val0, Val1, Val2, Val3 }
00059 # define _PS_CONST_TYPE(Name, Type, Val) \
00060 EXMAT_TYPE_ALIGN_PREFIX(16) Type _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16) = { Val, Val, Val, Val }
00061 # define _EPI32_CONST(Name, Val) \
00062 EXMAT_TYPE_ALIGN_PREFIX(16) int32_t _epi32_##Name[4] ASM_NAME(_epi32_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16) = { Val, Val, Val, Val }
00063 # define _PI32_CONST(Name, Val) \
00064 EXMAT_TYPE_ALIGN_PREFIX(8) int32_t _pi32_##Name[4] ASM_NAME(_pi32_##Name) EXMAT_TYPE_ALIGN_SUFFIX(8) = { Val, Val, Val, Val }
00065 #else
00066 # define AM_USE_PS_CONST(Name) \
00067 extern const float _ps_##Name[4]; \
00068 const __m128 Name = _mm_load_ps(ps_##Name)
00069 # define _PS_CONST(Name, Val) \
00070 EXMAT_TYPE_ALIGN_PREFIX(16) extern float _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16)
00071 # define _PS_CONST4(Name, Val0, Val1, Val2, Val3) \
00072 EXMAT_TYPE_ALIGN_PREFIX(16) extern float _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16)
00073 # define _PS_CONST_TYPE(Name, Type, Val) \
00074 EXMAT_TYPE_ALIGN_PREFIX(16) extern Type _ps_##Name[4] ASM_NAME(_ps_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16)
00075 # define _EPI32_CONST(Name, Val) \
00076 EXMAT_TYPE_ALIGN_PREFIX(16) extern int32_t _epi32_##Name[4] ASM_NAME(_epi32_##Name) EXMAT_TYPE_ALIGN_SUFFIX(16)
00077 # define _PI32_CONST(Name, Val) \
00078 EXMAT_TYPE_ALIGN_PREFIX(8) extern int32_t _pi32_##Name[4] ASM_NAME(_pi32_##Name) EXMAT_TYPE_ALIGN_SUFFIX(8)
00079 #endif
00080
00081
00082 _PS_CONST(am_0, 0.0f);
00083 _PS_CONST(am_1, 1.0f);
00084 _PS_CONST(am_m1, -1.0f);
00085 _PS_CONST(am_0p5, 0.5f);
00086 _PS_CONST(am_1p5, 1.5f);
00087 _PS_CONST(am_3, 3.0f);
00088 _PS_CONST(am_pi, (float)AM_PI);
00089 _PS_CONST(am_pi_o_2, (float)(AM_PI / 2.0));
00090 _PS_CONST(am_2_o_pi, (float)(2.0 / AM_PI));
00091 _PS_CONST(am_pi_o_4, (float)(AM_PI / 4.0));
00092 _PS_CONST(am_4_o_pi, (float)(4.0 / AM_PI));
00093 _PS_CONST_TYPE(am_sign_mask, int32_t, 0x80000000);
00094 _PS_CONST_TYPE(am_inv_sign_mask, int32_t, ~0x80000000);
00095 _PS_CONST_TYPE(am_min_norm_pos, int32_t, 0x00800000);
00096 _PS_CONST_TYPE(am_mant_mask, int32_t, 0x7f800000);
00097 _PS_CONST_TYPE(am_inv_mant_mask, int32_t, ~0x7f800000);
00098
00099 _EPI32_CONST(1, 1);
00100 _EPI32_CONST(2, 2);
00101 _EPI32_CONST(7, 7);
00102 _EPI32_CONST(0x7f, 0x7f);
00103 _EPI32_CONST(0xff, 0xff);
00104
00105 _PI32_CONST(0x80000000, 0x80000000);
00106 #define _pi32_1 _epi32_1
00107 #define _pi32_2 _epi32_2
00108 #define _pi32_7 _epi32_7
00109 _PI32_CONST(inv0x80000000, ~0x80000000);
00110 #define _pi32_0x7f _epi32_0x7f
00111 #define _pi32_0xff _epi32_0xff
00112
00113
00114 _PS_CONST(exp_hi, 88.3762626647949f);
00115 _PS_CONST(exp_lo, -88.3762626647949f);
00116 _PS_CONST(exp_rln2, 1.4426950408889634073599f);
00117 _PS_CONST(exp_p0, 1.26177193074810590878e-4f);
00118 _PS_CONST(exp_p1, 3.02994407707441961300e-2f);
00119 _PS_CONST(exp_q0, 3.00198505138664455042e-6f);
00120 _PS_CONST(exp_q1, 2.52448340349684104192e-3f);
00121 _PS_CONST(exp_q2, 2.27265548208155028766e-1f);
00122 _PS_CONST(exp_q3, 2.00000000000000000009e0f);
00123 _PS_CONST(exp_c1, 6.93145751953125e-1f);
00124 _PS_CONST(exp_c2, 1.42860682030941723212e-6f);
00125
00126
00127 _PS_CONST(exp2_hi, 127.4999961853f);
00128 _PS_CONST(exp2_lo, -127.4999961853f);
00129 _PS_CONST(exp2_p0, 2.30933477057345225087e-2f);
00130 _PS_CONST(exp2_p1, 2.02020656693165307700e1f);
00131 _PS_CONST(exp2_p2, 1.51390680115615096133e3f);
00132 _PS_CONST(exp2_q0, 2.33184211722314911771e2f);
00133 _PS_CONST(exp2_q1, 4.36821166879210612817e3f);
00134
00135
00136 _PS_CONST(log_p0, -7.89580278884799154124e-1f);
00137 _PS_CONST(log_p1, 1.63866645699558079767e1f);
00138 _PS_CONST(log_p2, -6.41409952958715622951e1f);
00139 _PS_CONST(log_q0, -3.56722798256324312549e1f);
00140 _PS_CONST(log_q1, 3.12093766372244180303e2f);
00141 _PS_CONST(log_q2, -7.69691943550460008604e2f);
00142 _PS_CONST(log_c0, 0.693147180559945f);
00143
00144
00145 _PS_CONST(log2_c0, 1.44269504088896340735992f);
00146
00147
00148 _PS_CONST(sincos_p0, 0.15707963267948963959e1f);
00149 _PS_CONST(sincos_p1, -0.64596409750621907082e0f);
00150 _PS_CONST(sincos_p2, 0.7969262624561800806e-1f);
00151 _PS_CONST(sincos_p3, -0.468175413106023168e-2f);
00152
00153
00154 _PS_CONST(tan_p0, -1.79565251976484877988e7f);
00155 _PS_CONST(tan_p1, 1.15351664838587416140e6f);
00156 _PS_CONST(tan_p2, -1.30936939181383777646e4f);
00157 _PS_CONST(tan_q0, -5.38695755929454629881e7f);
00158 _PS_CONST(tan_q1, 2.50083801823357915839e7f);
00159 _PS_CONST(tan_q2, -1.32089234440210967447e6f);
00160 _PS_CONST(tan_q3, 1.36812963470692954678e4f);
00161 _PS_CONST(tan_poleval, 3.68935e19f);
00162
00163
00164 _PS_CONST(atan_t0, -0.91646118527267623468e-1f);
00165 _PS_CONST(atan_t1, -0.13956945682312098640e1f);
00166 _PS_CONST(atan_t2, -0.94393926122725531747e2f);
00167 _PS_CONST(atan_t3, 0.12888383034157279340e2f);
00168 _PS_CONST(atan_s0, 0.12797564625607904396e1f);
00169 _PS_CONST(atan_s1, 0.21972168858277355914e1f);
00170 _PS_CONST(atan_s2, 0.68193064729268275701e1f);
00171 _PS_CONST(atan_s3, 0.28205206687035841409e2f);
00172
00173 #if defined(EXMAT_SIMD_IMP_CONSTANT)
00174 uint32_t _sincos_masks[2] ASM_NAME(_sincos_masks) = { 0x0, ~0x0 };
00175 uint32_t _sincos_inv_masks[2] ASM_NAME(_sincos_inv_masks) = { ~0x0, 0x0 };
00176 #else
00177 extern uint32_t _sincos_masks[2] ASM_NAME(_sincos_masks);
00178 extern uint32_t _sincos_inv_masks[2] ASM_NAME(_sincos_inv_masks);
00179 #endif // EXMAT_SIMD_IMP_CONSTANT
00180
00181 #undef ASM_NAME
00182
00183 #endif // EXMAT_ENABLE_SIMD