00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include "util/u_math.h"
00035 #include "util/u_memory.h"
00036 #include "pipe/p_config.h"
00037
00038 #include "draw_vs.h"
00039
00040 #if defined(PIPE_ARCH_X86)
00041
00042 #include "pipe/p_shader_tokens.h"
00043
00044 #include "draw_private.h"
00045 #include "draw_context.h"
00046
00047 #include "rtasm/rtasm_cpu.h"
00048 #include "rtasm/rtasm_x86sse.h"
00049 #include "tgsi/tgsi_sse2.h"
00050 #include "tgsi/tgsi_parse.h"
00051
00052 #define SSE_MAX_VERTICES 4
00053
00054 typedef void (PIPE_CDECL *codegen_function) (
00055 const struct tgsi_exec_vector *input,
00056 struct tgsi_exec_vector *output,
00057 float (*constant)[4],
00058 struct tgsi_exec_vector *temporary,
00059 float (*immediates)[4],
00060 const float (*aos_input)[4],
00061 uint num_inputs,
00062 uint input_stride,
00063 float (*aos_output)[4],
00064 uint num_outputs,
00065 uint output_stride );
00066
00067 struct draw_sse_vertex_shader {
00068 struct draw_vertex_shader base;
00069 struct x86_function sse2_program;
00070
00071 codegen_function func;
00072
00073 struct tgsi_exec_machine *machine;
00074 };
00075
00076
00077 static void
00078 vs_sse_prepare( struct draw_vertex_shader *base,
00079 struct draw_context *draw )
00080 {
00081 }
00082
00083
00084
00085
00086
00087
00088
00089 static void
00090 vs_sse_run_linear( struct draw_vertex_shader *base,
00091 const float (*input)[4],
00092 float (*output)[4],
00093 const float (*constants)[4],
00094 unsigned count,
00095 unsigned input_stride,
00096 unsigned output_stride )
00097 {
00098 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
00099 struct tgsi_exec_machine *machine = shader->machine;
00100 unsigned int i;
00101
00102
00103
00104
00105 tgsi_set_exec_mask(machine, 1, 1, 1, 1);
00106
00107 for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
00108 unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
00109
00110 if (max_vertices < 4) {
00111
00112 tgsi_set_exec_mask(machine,
00113 1,
00114 max_vertices > 1,
00115 max_vertices > 2,
00116 0);
00117 }
00118
00119
00120
00121 shader->func(machine->Inputs,
00122 machine->Outputs,
00123 (float (*)[4])constants,
00124 machine->Temps,
00125 (float (*)[4])shader->base.immediates,
00126 input,
00127 base->info.num_inputs,
00128 input_stride,
00129 output,
00130 base->info.num_outputs,
00131 output_stride );
00132
00133 input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
00134 output = (float (*)[4])((char *)output + output_stride * max_vertices);
00135 }
00136 }
00137
00138
00139
00140
00141 static void
00142 vs_sse_delete( struct draw_vertex_shader *base )
00143 {
00144 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
00145
00146 x86_release_func( &shader->sse2_program );
00147
00148 align_free( (void *) shader->base.immediates );
00149
00150 FREE( (void*) shader->base.state.tokens );
00151 FREE( shader );
00152 }
00153
00154
00155 struct draw_vertex_shader *
00156 draw_create_vs_sse(struct draw_context *draw,
00157 const struct pipe_shader_state *templ)
00158 {
00159 struct draw_sse_vertex_shader *vs;
00160
00161 if (!rtasm_cpu_has_sse2())
00162 return NULL;
00163
00164 vs = CALLOC_STRUCT( draw_sse_vertex_shader );
00165 if (vs == NULL)
00166 return NULL;
00167
00168
00169 vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
00170 if (!vs->base.state.tokens)
00171 goto fail;
00172
00173 tgsi_scan_shader(templ->tokens, &vs->base.info);
00174
00175 vs->base.draw = draw;
00176 if (1)
00177 vs->base.create_varient = draw_vs_varient_aos_sse;
00178 else
00179 vs->base.create_varient = draw_vs_varient_generic;
00180 vs->base.prepare = vs_sse_prepare;
00181 vs->base.run_linear = vs_sse_run_linear;
00182 vs->base.delete = vs_sse_delete;
00183
00184 vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
00185 sizeof(float), 16);
00186
00187 vs->machine = &draw->vs.machine;
00188
00189 x86_init_func( &vs->sse2_program );
00190
00191 if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
00192 &vs->sse2_program,
00193 (float (*)[4])vs->base.immediates,
00194 TRUE ))
00195 goto fail;
00196
00197 vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
00198 if (!vs->func) {
00199 goto fail;
00200 }
00201
00202 return &vs->base;
00203
00204 fail:
00205 debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
00206
00207 x86_release_func( &vs->sse2_program );
00208
00209 FREE(vs);
00210 return NULL;
00211 }
00212
00213
00214
00215 #else
00216
00217 struct draw_vertex_shader *
00218 draw_create_vs_sse( struct draw_context *draw,
00219 const struct pipe_shader_state *templ )
00220 {
00221 return (void *) 0;
00222 }
00223
00224
00225 #endif
00226