Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenm_debug_util.h
Go to the documentation of this file.
00001 /* 00002 * Mesa 3-D graphics library 00003 * Version: 6.1 00004 * 00005 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. 00006 * 00007 * Permission is hereby granted, free of charge, to any person obtaining a 00008 * copy of this software and associated documentation files (the "Software"), 00009 * to deal in the Software without restriction, including without limitation 00010 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00011 * and/or sell copies of the Software, and to permit persons to whom the 00012 * Software is furnished to do so, subject to the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00018 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00019 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00020 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 00021 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 00022 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00023 * 00024 * Authors: 00025 * Gareth Hughes 00026 */ 00027 00028 #ifndef __M_DEBUG_UTIL_H__ 00029 #define __M_DEBUG_UTIL_H__ 00030 00031 00032 #ifdef DEBUG_MATH /* This code only used for debugging */ 00033 00034 00035 /* Comment this out to deactivate the cycle counter. 00036 * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) 00037 * (hope, you don't try to debug Mesa on a 386 ;) 00038 */ 00039 #if defined(__GNUC__) && \ 00040 ((defined(__i386__) && defined(USE_X86_ASM)) || \ 00041 (defined(__sparc__) && defined(USE_SPARC_ASM))) 00042 #define RUN_DEBUG_BENCHMARK 00043 #endif 00044 00045 #define TEST_COUNT 128 /* size of the tested vector array */ 00046 00047 #define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ 00048 #define MAX_PRECISION 24 /* max. precision possible */ 00049 00050 00051 #ifdef RUN_DEBUG_BENCHMARK 00052 /* Overhead of profiling counter in cycles. Automatically adjusted to 00053 * your machine at run time - counter initialization should give very 00054 * consistent results. 00055 */ 00056 extern long counter_overhead; 00057 00058 /* This is the value of the environment variable MESA_PROFILE, and is 00059 * used to determine if we should benchmark the functions as well as 00060 * verify their correctness. 00061 */ 00062 extern char *mesa_profile; 00063 00064 /* Modify the the number of tests if you like. 00065 * We take the minimum of all results, because every error should be 00066 * positive (time used by other processes, task switches etc). 00067 * It is assumed that all calculations are done in the cache. 00068 */ 00069 00070 #if defined(__i386__) 00071 00072 #if 1 /* PPro, PII, PIII version */ 00073 00074 /* Profiling on the P6 architecture requires a little more work, due to 00075 * the internal out-of-order execution. We must perform a serializing 00076 * 'cpuid' instruction before and after the 'rdtsc' instructions to make 00077 * sure no other uops are executed when we sample the timestamp counter. 00078 */ 00079 #define INIT_COUNTER() \ 00080 do { \ 00081 int cycle_i; \ 00082 counter_overhead = LONG_MAX; \ 00083 for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \ 00084 long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 00085 __asm__ __volatile__ ( "push %%ebx \n" \ 00086 "xor %%eax, %%eax \n" \ 00087 "cpuid \n" \ 00088 "rdtsc \n" \ 00089 "mov %%eax, %0 \n" \ 00090 "xor %%eax, %%eax \n" \ 00091 "cpuid \n" \ 00092 "pop %%ebx \n" \ 00093 "push %%ebx \n" \ 00094 "xor %%eax, %%eax \n" \ 00095 "cpuid \n" \ 00096 "rdtsc \n" \ 00097 "mov %%eax, %1 \n" \ 00098 "xor %%eax, %%eax \n" \ 00099 "cpuid \n" \ 00100 "pop %%ebx \n" \ 00101 : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \ 00102 : : "eax", "ecx", "edx" ); \ 00103 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 00104 counter_overhead = cycle_tmp2 - cycle_tmp1; \ 00105 } \ 00106 } \ 00107 } while (0) 00108 00109 #define BEGIN_RACE(x) \ 00110 x = LONG_MAX; \ 00111 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 00112 long cycle_tmp1 = 0, cycle_tmp2 = 0; \ 00113 __asm__ __volatile__ ( "push %%ebx \n" \ 00114 "xor %%eax, %%eax \n" \ 00115 "cpuid \n" \ 00116 "rdtsc \n" \ 00117 "mov %%eax, %0 \n" \ 00118 "xor %%eax, %%eax \n" \ 00119 "cpuid \n" \ 00120 "pop %%ebx \n" \ 00121 : "=m" (cycle_tmp1) \ 00122 : : "eax", "ecx", "edx" ); 00123 00124 #define END_RACE(x) \ 00125 __asm__ __volatile__ ( "push %%ebx \n" \ 00126 "xor %%eax, %%eax \n" \ 00127 "cpuid \n" \ 00128 "rdtsc \n" \ 00129 "mov %%eax, %0 \n" \ 00130 "xor %%eax, %%eax \n" \ 00131 "cpuid \n" \ 00132 "pop %%ebx \n" \ 00133 : "=m" (cycle_tmp2) \ 00134 : : "eax", "ecx", "edx" ); \ 00135 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 00136 x = cycle_tmp2 - cycle_tmp1; \ 00137 } \ 00138 } \ 00139 x -= counter_overhead; 00140 00141 #else /* PPlain, PMMX version */ 00142 00143 /* To ensure accurate results, we stall the pipelines with the 00144 * non-pairable 'cdq' instruction. This ensures all the code being 00145 * profiled is complete when the 'rdtsc' instruction executes. 00146 */ 00147 #define INIT_COUNTER(x) \ 00148 do { \ 00149 int cycle_i; \ 00150 x = LONG_MAX; \ 00151 for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \ 00152 long cycle_tmp1, cycle_tmp2, dummy; \ 00153 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 00154 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 00155 __asm__ ( "cdq" ); \ 00156 __asm__ ( "cdq" ); \ 00157 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \ 00158 __asm__ ( "cdq" ); \ 00159 __asm__ ( "cdq" ); \ 00160 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 00161 if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 00162 x = cycle_tmp2 - cycle_tmp1; \ 00163 } \ 00164 } while (0) 00165 00166 #define BEGIN_RACE(x) \ 00167 x = LONG_MAX; \ 00168 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 00169 long cycle_tmp1, cycle_tmp2, dummy; \ 00170 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ 00171 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ 00172 __asm__ ( "cdq" ); \ 00173 __asm__ ( "cdq" ); \ 00174 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); 00175 00176 00177 #define END_RACE(x) \ 00178 __asm__ ( "cdq" ); \ 00179 __asm__ ( "cdq" ); \ 00180 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ 00181 if ( x > (cycle_tmp2 - cycle_tmp1) ) \ 00182 x = cycle_tmp2 - cycle_tmp1; \ 00183 } \ 00184 x -= counter_overhead; 00185 00186 #endif 00187 00188 #elif defined(__x86_64__) 00189 00190 #define rdtscll(val) do { \ 00191 unsigned int a,d; \ 00192 __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \ 00193 (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ 00194 } while(0) 00195 00196 /* Copied from i386 PIII version */ 00197 #define INIT_COUNTER() \ 00198 do { \ 00199 int cycle_i; \ 00200 counter_overhead = LONG_MAX; \ 00201 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ 00202 unsigned long cycle_tmp1, cycle_tmp2; \ 00203 rdtscll(cycle_tmp1); \ 00204 rdtscll(cycle_tmp2); \ 00205 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ 00206 counter_overhead = cycle_tmp2 - cycle_tmp1; \ 00207 } \ 00208 } \ 00209 } while (0) 00210 00211 00212 #define BEGIN_RACE(x) \ 00213 x = LONG_MAX; \ 00214 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ 00215 unsigned long cycle_tmp1, cycle_tmp2; \ 00216 rdtscll(cycle_tmp1); \ 00217 00218 #define END_RACE(x) \ 00219 rdtscll(cycle_tmp2); \ 00220 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ 00221 x = cycle_tmp2 - cycle_tmp1; \ 00222 } \ 00223 } \ 00224 x -= counter_overhead; 00225 00226 #elif defined(__sparc__) 00227 00228 #define INIT_COUNTER() \ 00229 do { counter_overhead = 5; } while(0) 00230 00231 #define BEGIN_RACE(x) \ 00232 x = LONG_MAX; \ 00233 for (cycle_i = 0; cycle_i <10; cycle_i++) { \ 00234 register long cycle_tmp1 asm("l0"); \ 00235 register long cycle_tmp2 asm("l1"); \ 00236 /* rd %tick, %l0 */ \ 00237 __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */ 00238 00239 #define END_RACE(x) \ 00240 /* rd %tick, %l1 */ \ 00241 __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \ 00242 if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ 00243 } \ 00244 x -= counter_overhead; 00245 00246 #else 00247 #error Your processor is not supported for RUN_XFORM_BENCHMARK 00248 #endif 00249 00250 #else 00251 00252 #define BEGIN_RACE(x) 00253 #define END_RACE(x) 00254 00255 #endif 00256 00257 00258 /* ============================================================= 00259 * Helper functions 00260 */ 00261 00262 static GLfloat rnd( void ) 00263 { 00264 GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; 00265 GLfloat gran = (GLfloat)(1 << 13); 00266 00267 f = (GLfloat)(GLint)(f * gran) / gran; 00268 00269 return f * 2.0 - 1.0; 00270 } 00271 00272 static int significand_match( GLfloat a, GLfloat b ) 00273 { 00274 GLfloat d = a - b; 00275 int a_ex, b_ex, d_ex; 00276 00277 if ( d == 0.0F ) { 00278 return MAX_PRECISION; /* Exact match */ 00279 } 00280 00281 if ( a == 0.0F || b == 0.0F ) { 00282 /* It would probably be better to check if the 00283 * non-zero number is denormalized and return 00284 * the index of the highest set bit here. 00285 */ 00286 return 0; 00287 } 00288 00289 FREXPF( a, &a_ex ); 00290 FREXPF( b, &b_ex ); 00291 FREXPF( d, &d_ex ); 00292 00293 if ( a_ex < b_ex ) { 00294 return a_ex - d_ex; 00295 } else { 00296 return b_ex - d_ex; 00297 } 00298 } 00299 00300 enum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 }; 00301 00302 /* Ensure our arrays are correctly aligned. 00303 */ 00304 #if defined(__GNUC__) 00305 # define ALIGN16(type, array) type array __attribute__ ((aligned (16))) 00306 #elif defined(_MSC_VER) 00307 # define ALIGN16(type, array) type array __declspec(align(16)) /* GH: Does this work? */ 00308 #elif defined(__WATCOMC__) 00309 # define ALIGN16(type, array) /* Watcom does not support this */ 00310 #elif defined(__xlC__) 00311 # define ALIGN16(type, array) type __align (16) array 00312 #else 00313 # warning "ALIGN16 will not 16-byte align!\n" 00314 # define ALIGN16 00315 #endif 00316 00317 00318 #endif /* DEBUG_MATH */ 00319 00320 #endif /* __M_DEBUG_UTIL_H__ */ Generated on Fri May 25 2012 04:18:42 for ReactOS by
1.7.6.1
|