draw_vs_sse.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028  /*
00029   * Authors:
00030   *   Keith Whitwell <keith@tungstengraphics.com>
00031   *   Brian Paul
00032   */
00033 
00034 #include "util/u_math.h"
00035 #include "util/u_memory.h"
00036 #include "pipe/p_config.h"
00037 
00038 #include "draw_vs.h"
00039 
00040 #if defined(PIPE_ARCH_X86)
00041 
00042 #include "pipe/p_shader_tokens.h"
00043 
00044 #include "draw_private.h"
00045 #include "draw_context.h"
00046 
00047 #include "rtasm/rtasm_cpu.h"
00048 #include "rtasm/rtasm_x86sse.h"
00049 #include "tgsi/tgsi_sse2.h"
00050 #include "tgsi/tgsi_parse.h"
00051 
00052 #define SSE_MAX_VERTICES 4
00053 
00054 typedef void (PIPE_CDECL *codegen_function) (
00055    const struct tgsi_exec_vector *input, /* 1 */
00056    struct tgsi_exec_vector *output, /* 2 */
00057    float (*constant)[4],        /* 3 */
00058    struct tgsi_exec_vector *temporary, /* 4 */
00059    float (*immediates)[4],      /* 5 */
00060    const float (*aos_input)[4], /* 6 */
00061    uint num_inputs,             /* 7 */
00062    uint input_stride,           /* 8 */
00063    float (*aos_output)[4],      /* 9 */
00064    uint num_outputs,            /* 10 */
00065    uint output_stride );        /* 11 */
00066 
00067 struct draw_sse_vertex_shader {
00068    struct draw_vertex_shader base;
00069    struct x86_function sse2_program;
00070 
00071    codegen_function func;
00072    
00073    struct tgsi_exec_machine *machine;
00074 };
00075 
00076 
00077 static void
00078 vs_sse_prepare( struct draw_vertex_shader *base,
00079                 struct draw_context *draw )
00080 {
00081 }
00082 
00083 
00084 
00085 /* Simplified vertex shader interface for the pt paths.  Given the
00086  * complexity of code-generating all the above operations together,
00087  * it's time to try doing all the other stuff separately.
00088  */
00089 static void
00090 vs_sse_run_linear( struct draw_vertex_shader *base,
00091                    const float (*input)[4],
00092                    float (*output)[4],
00093                    const float (*constants)[4],
00094                    unsigned count,
00095                    unsigned input_stride,
00096                    unsigned output_stride )
00097 {
00098    struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
00099    struct tgsi_exec_machine *machine = shader->machine;
00100    unsigned int i;
00101 
00102    /* By default, execute all channels.  XXX move this inside the loop
00103     * below when we support shader conditionals/loops.
00104     */
00105    tgsi_set_exec_mask(machine, 1, 1, 1, 1);
00106 
00107    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
00108       unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
00109 
00110       if (max_vertices < 4) {
00111          /* disable the unused execution channels */
00112          tgsi_set_exec_mask(machine,
00113                             1,
00114                             max_vertices > 1,
00115                             max_vertices > 2,
00116                             0);
00117       }
00118 
00119       /* run compiled shader
00120        */
00121       shader->func(machine->Inputs,
00122                    machine->Outputs,
00123                    (float (*)[4])constants,
00124                    machine->Temps,
00125                    (float (*)[4])shader->base.immediates,
00126                    input,
00127                    base->info.num_inputs,
00128                    input_stride,
00129                    output,
00130                    base->info.num_outputs,
00131                    output_stride );
00132 
00133       input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
00134       output = (float (*)[4])((char *)output + output_stride * max_vertices);
00135    }
00136 }
00137 
00138 
00139 
00140 
00141 static void
00142 vs_sse_delete( struct draw_vertex_shader *base )
00143 {
00144    struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
00145    
00146    x86_release_func( &shader->sse2_program );
00147 
00148    align_free( (void *) shader->base.immediates );
00149 
00150    FREE( (void*) shader->base.state.tokens );
00151    FREE( shader );
00152 }
00153 
00154 
00155 struct draw_vertex_shader *
00156 draw_create_vs_sse(struct draw_context *draw,
00157                           const struct pipe_shader_state *templ)
00158 {
00159    struct draw_sse_vertex_shader *vs;
00160 
00161    if (!rtasm_cpu_has_sse2())
00162       return NULL;
00163 
00164    vs = CALLOC_STRUCT( draw_sse_vertex_shader );
00165    if (vs == NULL) 
00166       return NULL;
00167 
00168    /* we make a private copy of the tokens */
00169    vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
00170    if (!vs->base.state.tokens)
00171       goto fail;
00172 
00173    tgsi_scan_shader(templ->tokens, &vs->base.info);
00174 
00175    vs->base.draw = draw;
00176    if (1)
00177       vs->base.create_varient = draw_vs_varient_aos_sse;
00178    else
00179       vs->base.create_varient = draw_vs_varient_generic;
00180    vs->base.prepare = vs_sse_prepare;
00181    vs->base.run_linear = vs_sse_run_linear;
00182    vs->base.delete = vs_sse_delete;
00183    
00184    vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
00185                                       sizeof(float), 16);
00186 
00187    vs->machine = &draw->vs.machine;
00188    
00189    x86_init_func( &vs->sse2_program );
00190 
00191    if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
00192                         &vs->sse2_program, 
00193                         (float (*)[4])vs->base.immediates, 
00194                         TRUE )) 
00195       goto fail;
00196       
00197    vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
00198    if (!vs->func) {
00199       goto fail;
00200    }
00201    
00202    return &vs->base;
00203 
00204 fail:
00205    debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
00206 
00207    x86_release_func( &vs->sse2_program );
00208    
00209    FREE(vs);
00210    return NULL;
00211 }
00212 
00213 
00214 
00215 #else
00216 
00217 struct draw_vertex_shader *
00218 draw_create_vs_sse( struct draw_context *draw,
00219                     const struct pipe_shader_state *templ )
00220 {
00221    return (void *) 0;
00222 }
00223 
00224 
00225 #endif
00226 

Generated on Tue Sep 29 06:25:14 2009 for Gallium3D by  doxygen 1.5.4