Gallium3D: gallium/auxiliary/draw/draw_vs_aos

00001 /**************************************************************************
00002  * 
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028 
00029 #include "pipe/p_config.h"
00030 
00031 
00032 #include "pipe/p_shader_tokens.h"
00033 #include "util/u_math.h"
00034 #include "util/u_memory.h"
00035 #include "tgsi/tgsi_parse.h"
00036 #include "tgsi/tgsi_util.h"
00037 #include "tgsi/tgsi_exec.h"
00038 #include "draw_vs.h"
00039 #include "draw_vs_aos.h"
00040 #include "draw_vertex.h"
00041 
00042 #ifdef PIPE_ARCH_X86
00043 
00044 #include "rtasm/rtasm_x86sse.h"
00045 
00046 
00047 #define X87_CW_EXCEPTION_INV_OP       (1<<0)
00048 #define X87_CW_EXCEPTION_DENORM_OP    (1<<1)
00049 #define X87_CW_EXCEPTION_ZERO_DIVIDE  (1<<2)
00050 #define X87_CW_EXCEPTION_OVERFLOW     (1<<3)
00051 #define X87_CW_EXCEPTION_UNDERFLOW    (1<<4)
00052 #define X87_CW_EXCEPTION_PRECISION    (1<<5)
00053 #define X87_CW_PRECISION_SINGLE       (0<<8)
00054 #define X87_CW_PRECISION_RESERVED     (1<<8)
00055 #define X87_CW_PRECISION_DOUBLE       (2<<8)
00056 #define X87_CW_PRECISION_DOUBLE_EXT   (3<<8)
00057 #define X87_CW_PRECISION_MASK         (3<<8)
00058 #define X87_CW_ROUND_NEAREST          (0<<10)
00059 #define X87_CW_ROUND_DOWN             (1<<10)
00060 #define X87_CW_ROUND_UP               (2<<10)
00061 #define X87_CW_ROUND_ZERO             (3<<10)
00062 #define X87_CW_ROUND_MASK             (3<<10)
00063 #define X87_CW_INFINITY               (1<<12)
00064 
00065 
00066 void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
00067                             float *result,
00068                             const float *in,
00069                             unsigned count )
00070 {
00071    if (in[0] > 0) 
00072    {
00073       if (in[1] <= 0.0) 
00074       {
00075          result[0] = 1.0F;
00076          result[1] = in[0];
00077          result[2] = 0.0F;
00078          result[3] = 1.0F;
00079       }
00080       else
00081       {
00082          const float epsilon = 1.0F / 256.0F;    
00083          float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
00084          result[0] = 1.0F;
00085          result[1] = in[0];
00086          result[2] = powf(in[1], exponent);
00087          result[3] = 1.0;
00088       }
00089    }
00090    else 
00091    {
00092       result[0] = 1.0F;
00093       result[1] = 0.0;
00094       result[2] = 0.0;
00095       result[3] = 1.0F;
00096    }
00097 }
00098 
00099 
00100 static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
00101                                    float *result,
00102                                    const float *in,
00103                                    unsigned count )
00104 {
00105    if (in[0] > 0) 
00106    {
00107       if (in[1] <= 0.0) 
00108       {
00109          result[0] = 1.0F;
00110          result[1] = in[0];
00111          result[2] = 0.0F;
00112          result[3] = 1.0F;
00113          return;
00114       }
00115       
00116       if (machine->lit_info[count].shine_tab->exponent != in[3]) {
00117          machine->lit_info[count].func = aos_do_lit;
00118          goto no_luck;
00119       }
00120 
00121       if (in[1] <= 1.0)
00122       {
00123          const float *tab = machine->lit_info[count].shine_tab->values;
00124          float f = in[1] * 256;
00125          int k = (int)f;
00126          float frac = f - (float)k;
00127          
00128          result[0] = 1.0F;
00129          result[1] = in[0];
00130          result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
00131          result[3] = 1.0;
00132          return;
00133       }
00134       
00135    no_luck:
00136       {
00137          const float epsilon = 1.0F / 256.0F;    
00138          float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
00139          result[0] = 1.0F;
00140          result[1] = in[0];
00141          result[2] = powf(in[1], exponent);
00142          result[3] = 1.0;
00143       }
00144    }
00145    else 
00146    {
00147       result[0] = 1.0F;
00148       result[1] = 0.0;
00149       result[2] = 0.0;
00150       result[3] = 1.0F;
00151    }
00152 }
00153 
00154 
00155 static void do_populate_lut( struct shine_tab *tab,
00156                              float unclamped_exponent )
00157 {
00158    const float epsilon = 1.0F / 256.0F;    
00159    float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
00160    unsigned i;
00161 
00162    tab->exponent = unclamped_exponent; /* for later comparison */
00163    
00164    tab->values[0] = 0;
00165    if (exponent == 0) {
00166       for (i = 1; i < 258; i++) {
00167          tab->values[i] = 1.0;
00168       }      
00169    }
00170    else {
00171       for (i = 1; i < 258; i++) {
00172          tab->values[i] = powf((float)i * epsilon, exponent);
00173       }
00174    }
00175 }
00176 
00177 
00178 
00179 
00180 static void PIPE_CDECL populate_lut( struct aos_machine *machine,
00181                                      float *result,
00182                                      const float *in,
00183                                      unsigned count )
00184 {
00185    unsigned i, tab;
00186 
00187    /* Search for an existing table for this value.  Note that without
00188     * static analysis we don't really know if in[3] will be constant,
00189     * but it usually is...
00190     */
00191    for (tab = 0; tab < 4; tab++) {
00192       if (machine->shine_tab[tab].exponent == in[3]) {
00193          goto found;
00194       }
00195    }
00196 
00197    for (tab = 0, i = 1; i < 4; i++) {
00198       if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
00199          tab = i;
00200    }
00201 
00202    if (machine->shine_tab[tab].last_used == machine->now) {
00203       /* No unused tables (this is not a ffvertex program...).  Just
00204        * call pow each time:
00205        */
00206       machine->lit_info[count].func = aos_do_lit;
00207       machine->lit_info[count].func( machine, result, in, count );
00208       return;
00209    }
00210    else {
00211       do_populate_lut( &machine->shine_tab[tab], in[3] );
00212    }
00213 
00214  found:
00215    machine->shine_tab[tab].last_used = machine->now;
00216    machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
00217    machine->lit_info[count].func = do_lit_lut;
00218    machine->lit_info[count].func( machine, result, in, count );
00219 }
00220 
00221 
00222 void draw_vs_aos_machine_constants( struct aos_machine *machine,
00223                                     const float (*constants)[4] )
00224 {
00225    machine->constants = constants;
00226 
00227    {
00228       unsigned i;
00229       for (i = 0; i < MAX_LIT_INFO; i++) {
00230          machine->lit_info[i].func = populate_lut;
00231          machine->now++;
00232       }
00233    }
00234 }
00235 
00236 
00237 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
00238                                    const struct pipe_viewport_state *viewport )
00239 {
00240    memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
00241    memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
00242 }
00243 
00244 
00245 
00246 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
00247 {
00248    align_free(machine);
00249 }
00250 
00251 struct aos_machine *draw_vs_aos_machine( void )
00252 {
00253    struct aos_machine *machine;
00254    unsigned i;
00255    float inv = 1.0f/255.0f;
00256    float f255 = 255.0f;
00257 
00258    machine = align_malloc(sizeof(struct aos_machine), 16);
00259    if (!machine)
00260       return NULL;
00261 
00262    memset(machine, 0, sizeof(*machine));
00263 
00264    ASSIGN_4V(machine->internal[IMM_SWZ],       1.0f,  -1.0f,  0.0f, 1.0f);
00265    *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
00266 
00267    ASSIGN_4V(machine->internal[IMM_ONES],      1.0f,  1.0f,  1.0f,  1.0f);
00268    ASSIGN_4V(machine->internal[IMM_NEGS],     -1.0f, -1.0f, -1.0f, -1.0f);
00269    ASSIGN_4V(machine->internal[IMM_IDENTITY],  0.0f,  0.0f,  0.0f,  1.0f);
00270    ASSIGN_4V(machine->internal[IMM_INV_255],   inv,   inv,   inv,   inv);
00271    ASSIGN_4V(machine->internal[IMM_255],       f255,  f255,  f255,  f255);
00272    ASSIGN_4V(machine->internal[IMM_RSQ],       -.5f,  1.5f,  0.0f,  0.0f);
00273 
00274 
00275    machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
00276                                X87_CW_EXCEPTION_DENORM_OP |
00277                                X87_CW_EXCEPTION_ZERO_DIVIDE |
00278                                X87_CW_EXCEPTION_OVERFLOW |
00279                                X87_CW_EXCEPTION_UNDERFLOW |
00280                                X87_CW_EXCEPTION_PRECISION |
00281                                (1<<6) |
00282                                X87_CW_ROUND_NEAREST |
00283                                X87_CW_PRECISION_DOUBLE_EXT);
00284 
00285    assert(machine->fpu_rnd_nearest == 0x37f);
00286                                
00287    machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
00288                                X87_CW_EXCEPTION_DENORM_OP |
00289                                X87_CW_EXCEPTION_ZERO_DIVIDE |
00290                                X87_CW_EXCEPTION_OVERFLOW |
00291                                X87_CW_EXCEPTION_UNDERFLOW |
00292                                X87_CW_EXCEPTION_PRECISION |
00293                                (1<<6) |
00294                                X87_CW_ROUND_DOWN |
00295                                X87_CW_PRECISION_DOUBLE_EXT);
00296 
00297    for (i = 0; i < MAX_SHINE_TAB; i++)
00298       do_populate_lut( &machine->shine_tab[i], 1.0f );
00299 
00300    return machine;
00301 }
00302 
00303 #else
00304 
00305 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
00306                                    const struct pipe_viewport_state *viewport )
00307 {
00308 }
00309 
00310 void draw_vs_aos_machine_constants( struct aos_machine *machine,
00311                                     const float (*constants)[4] )
00312 {
00313 }
00314 
00315 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
00316 {
00317 }
00318 
00319 struct aos_machine *draw_vs_aos_machine( void )
00320 {
00321    return NULL;
00322 }
00323 #endif
00324
draw_vs_aos_machine.c