00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "pipe/p_config.h"
00030
00031
00032 #include "pipe/p_shader_tokens.h"
00033 #include "util/u_math.h"
00034 #include "util/u_memory.h"
00035 #include "tgsi/tgsi_parse.h"
00036 #include "tgsi/tgsi_util.h"
00037 #include "tgsi/tgsi_exec.h"
00038 #include "draw_vs.h"
00039 #include "draw_vs_aos.h"
00040 #include "draw_vertex.h"
00041
00042 #ifdef PIPE_ARCH_X86
00043
00044 #include "rtasm/rtasm_x86sse.h"
00045
00046
00047 #define X87_CW_EXCEPTION_INV_OP (1<<0)
00048 #define X87_CW_EXCEPTION_DENORM_OP (1<<1)
00049 #define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
00050 #define X87_CW_EXCEPTION_OVERFLOW (1<<3)
00051 #define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
00052 #define X87_CW_EXCEPTION_PRECISION (1<<5)
00053 #define X87_CW_PRECISION_SINGLE (0<<8)
00054 #define X87_CW_PRECISION_RESERVED (1<<8)
00055 #define X87_CW_PRECISION_DOUBLE (2<<8)
00056 #define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
00057 #define X87_CW_PRECISION_MASK (3<<8)
00058 #define X87_CW_ROUND_NEAREST (0<<10)
00059 #define X87_CW_ROUND_DOWN (1<<10)
00060 #define X87_CW_ROUND_UP (2<<10)
00061 #define X87_CW_ROUND_ZERO (3<<10)
00062 #define X87_CW_ROUND_MASK (3<<10)
00063 #define X87_CW_INFINITY (1<<12)
00064
00065
00066 void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
00067 float *result,
00068 const float *in,
00069 unsigned count )
00070 {
00071 if (in[0] > 0)
00072 {
00073 if (in[1] <= 0.0)
00074 {
00075 result[0] = 1.0F;
00076 result[1] = in[0];
00077 result[2] = 0.0F;
00078 result[3] = 1.0F;
00079 }
00080 else
00081 {
00082 const float epsilon = 1.0F / 256.0F;
00083 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
00084 result[0] = 1.0F;
00085 result[1] = in[0];
00086 result[2] = powf(in[1], exponent);
00087 result[3] = 1.0;
00088 }
00089 }
00090 else
00091 {
00092 result[0] = 1.0F;
00093 result[1] = 0.0;
00094 result[2] = 0.0;
00095 result[3] = 1.0F;
00096 }
00097 }
00098
00099
00100 static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
00101 float *result,
00102 const float *in,
00103 unsigned count )
00104 {
00105 if (in[0] > 0)
00106 {
00107 if (in[1] <= 0.0)
00108 {
00109 result[0] = 1.0F;
00110 result[1] = in[0];
00111 result[2] = 0.0F;
00112 result[3] = 1.0F;
00113 return;
00114 }
00115
00116 if (machine->lit_info[count].shine_tab->exponent != in[3]) {
00117 machine->lit_info[count].func = aos_do_lit;
00118 goto no_luck;
00119 }
00120
00121 if (in[1] <= 1.0)
00122 {
00123 const float *tab = machine->lit_info[count].shine_tab->values;
00124 float f = in[1] * 256;
00125 int k = (int)f;
00126 float frac = f - (float)k;
00127
00128 result[0] = 1.0F;
00129 result[1] = in[0];
00130 result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
00131 result[3] = 1.0;
00132 return;
00133 }
00134
00135 no_luck:
00136 {
00137 const float epsilon = 1.0F / 256.0F;
00138 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
00139 result[0] = 1.0F;
00140 result[1] = in[0];
00141 result[2] = powf(in[1], exponent);
00142 result[3] = 1.0;
00143 }
00144 }
00145 else
00146 {
00147 result[0] = 1.0F;
00148 result[1] = 0.0;
00149 result[2] = 0.0;
00150 result[3] = 1.0F;
00151 }
00152 }
00153
00154
00155 static void do_populate_lut( struct shine_tab *tab,
00156 float unclamped_exponent )
00157 {
00158 const float epsilon = 1.0F / 256.0F;
00159 float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
00160 unsigned i;
00161
00162 tab->exponent = unclamped_exponent;
00163
00164 tab->values[0] = 0;
00165 if (exponent == 0) {
00166 for (i = 1; i < 258; i++) {
00167 tab->values[i] = 1.0;
00168 }
00169 }
00170 else {
00171 for (i = 1; i < 258; i++) {
00172 tab->values[i] = powf((float)i * epsilon, exponent);
00173 }
00174 }
00175 }
00176
00177
00178
00179
00180 static void PIPE_CDECL populate_lut( struct aos_machine *machine,
00181 float *result,
00182 const float *in,
00183 unsigned count )
00184 {
00185 unsigned i, tab;
00186
00187
00188
00189
00190
00191 for (tab = 0; tab < 4; tab++) {
00192 if (machine->shine_tab[tab].exponent == in[3]) {
00193 goto found;
00194 }
00195 }
00196
00197 for (tab = 0, i = 1; i < 4; i++) {
00198 if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
00199 tab = i;
00200 }
00201
00202 if (machine->shine_tab[tab].last_used == machine->now) {
00203
00204
00205
00206 machine->lit_info[count].func = aos_do_lit;
00207 machine->lit_info[count].func( machine, result, in, count );
00208 return;
00209 }
00210 else {
00211 do_populate_lut( &machine->shine_tab[tab], in[3] );
00212 }
00213
00214 found:
00215 machine->shine_tab[tab].last_used = machine->now;
00216 machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
00217 machine->lit_info[count].func = do_lit_lut;
00218 machine->lit_info[count].func( machine, result, in, count );
00219 }
00220
00221
00222 void draw_vs_aos_machine_constants( struct aos_machine *machine,
00223 const float (*constants)[4] )
00224 {
00225 machine->constants = constants;
00226
00227 {
00228 unsigned i;
00229 for (i = 0; i < MAX_LIT_INFO; i++) {
00230 machine->lit_info[i].func = populate_lut;
00231 machine->now++;
00232 }
00233 }
00234 }
00235
00236
00237 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
00238 const struct pipe_viewport_state *viewport )
00239 {
00240 memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
00241 memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
00242 }
00243
00244
00245
00246 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
00247 {
00248 align_free(machine);
00249 }
00250
00251 struct aos_machine *draw_vs_aos_machine( void )
00252 {
00253 struct aos_machine *machine;
00254 unsigned i;
00255 float inv = 1.0f/255.0f;
00256 float f255 = 255.0f;
00257
00258 machine = align_malloc(sizeof(struct aos_machine), 16);
00259 if (!machine)
00260 return NULL;
00261
00262 memset(machine, 0, sizeof(*machine));
00263
00264 ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
00265 *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
00266
00267 ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
00268 ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
00269 ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
00270 ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
00271 ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
00272 ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
00273
00274
00275 machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
00276 X87_CW_EXCEPTION_DENORM_OP |
00277 X87_CW_EXCEPTION_ZERO_DIVIDE |
00278 X87_CW_EXCEPTION_OVERFLOW |
00279 X87_CW_EXCEPTION_UNDERFLOW |
00280 X87_CW_EXCEPTION_PRECISION |
00281 (1<<6) |
00282 X87_CW_ROUND_NEAREST |
00283 X87_CW_PRECISION_DOUBLE_EXT);
00284
00285 assert(machine->fpu_rnd_nearest == 0x37f);
00286
00287 machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
00288 X87_CW_EXCEPTION_DENORM_OP |
00289 X87_CW_EXCEPTION_ZERO_DIVIDE |
00290 X87_CW_EXCEPTION_OVERFLOW |
00291 X87_CW_EXCEPTION_UNDERFLOW |
00292 X87_CW_EXCEPTION_PRECISION |
00293 (1<<6) |
00294 X87_CW_ROUND_DOWN |
00295 X87_CW_PRECISION_DOUBLE_EXT);
00296
00297 for (i = 0; i < MAX_SHINE_TAB; i++)
00298 do_populate_lut( &machine->shine_tab[i], 1.0f );
00299
00300 return machine;
00301 }
00302
00303 #else
00304
00305 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
00306 const struct pipe_viewport_state *viewport )
00307 {
00308 }
00309
00310 void draw_vs_aos_machine_constants( struct aos_machine *machine,
00311 const float (*constants)[4] )
00312 {
00313 }
00314
00315 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
00316 {
00317 }
00318
00319 struct aos_machine *draw_vs_aos_machine( void )
00320 {
00321 return NULL;
00322 }
00323 #endif
00324