00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <spu_mfcio.h>
00036
00037 #include "pipe/p_state.h"
00038 #include "pipe/p_shader_tokens.h"
00039 #include "util/u_math.h"
00040 #include "draw/draw_private.h"
00041 #include "draw/draw_context.h"
00042 #include "cell/common.h"
00043 #include "spu_vertex_shader.h"
00044 #include "spu_exec.h"
00045 #include "spu_main.h"
00046
00047
00048 #define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
00049
00050
00051 #define CLIP_RIGHT_BIT 0x01
00052 #define CLIP_LEFT_BIT 0x02
00053 #define CLIP_TOP_BIT 0x04
00054 #define CLIP_BOTTOM_BIT 0x08
00055 #define CLIP_FAR_BIT 0x10
00056 #define CLIP_NEAR_BIT 0x20
00057
00058
00059 static INLINE float
00060 dot4(const float *a, const float *b)
00061 {
00062 return (a[0]*b[0] +
00063 a[1]*b[1] +
00064 a[2]*b[2] +
00065 a[3]*b[3]);
00066 }
00067
00068 static INLINE unsigned
00069 compute_clipmask(const float *clip, float plane[][4], unsigned nr)
00070 {
00071 unsigned mask = 0;
00072 unsigned i;
00073
00074
00075
00076 if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
00077 if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
00078 if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
00079 if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
00080 if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
00081 if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
00082
00083
00084
00085 for (i = 6; i < nr; i++) {
00086 if (dot4(clip, plane[i]) < 0)
00087 mask |= (1<<i);
00088 }
00089
00090 return mask;
00091 }
00092
00093
00102 static void
00103 run_vertex_program(struct spu_vs_context *draw,
00104 unsigned elts[4], unsigned count,
00105 const uint64_t *vOut)
00106 {
00107 struct spu_exec_machine *machine = &draw->machine;
00108 unsigned int j;
00109
00110 ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS);
00111 ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS);
00112 const float *scale = draw->viewport.scale;
00113 const float *trans = draw->viewport.translate;
00114
00115 ASSERT(count <= 4);
00116
00117 machine->Processor = TGSI_PROCESSOR_VERTEX;
00118
00119 ASSERT_ALIGN16(draw->constants);
00120 machine->Consts = (float (*)[4]) draw->constants;
00121
00122 machine->Inputs = ALIGN16_ASSIGN(inputs);
00123 machine->Outputs = ALIGN16_ASSIGN(outputs);
00124
00125 spu_vertex_fetch( draw, machine, elts, count );
00126
00127
00128 spu_exec_machine_run( machine );
00129
00130
00131
00132 for (j = 0; j < count; j++) {
00133 unsigned slot;
00134 float x, y, z, w;
00135 unsigned char buffer[sizeof(struct vertex_header)
00136 + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
00137 struct vertex_header *const tmpOut =
00138 (struct vertex_header *) buffer;
00139 const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
00140 + (sizeof(float) * 4
00141 * draw->num_vs_outputs));
00142
00143 mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
00144 wait_on_mask(1 << TAG_VERTEX_BUFFER);
00145
00146
00147
00148
00149
00150
00151
00152
00153 x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
00154 y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
00155 z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
00156 w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
00157
00158 tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
00159 draw->nr_planes);
00160 tmpOut->edgeflag = 1;
00161
00162
00163 w = 1.0f / w;
00164 x *= w;
00165 y *= w;
00166 z *= w;
00167
00168
00169 tmpOut->data[0][0] = x * scale[0] + trans[0];
00170 tmpOut->data[0][1] = y * scale[1] + trans[1];
00171 tmpOut->data[0][2] = z * scale[2] + trans[2];
00172 tmpOut->data[0][3] = w;
00173
00174
00175
00176
00177 for (slot = 1; slot < draw->num_vs_outputs; slot++) {
00178 tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
00179 tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
00180 tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
00181 tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
00182 }
00183
00184 mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
00185 }
00186 }
00187
00188
00189 unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
00190 ALIGN16_ATTRIB;
00191
00192
00193 void
00194 spu_bind_vertex_shader(struct spu_vs_context *draw,
00195 struct cell_shader_info *vs)
00196 {
00197 const unsigned immediate_addr = vs->immediates;
00198 const unsigned immediate_size =
00199 ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
00200 + (immediate_addr & 0x0f));
00201
00202
00203 mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
00204 TAG_VERTEX_BUFFER, 0, 0);
00205
00206 draw->machine.Instructions = (struct tgsi_full_instruction *)
00207 vs->instructions;
00208 draw->machine.NumInstructions = vs->num_instructions;
00209
00210 draw->machine.Declarations = (struct tgsi_full_declaration *)
00211 vs->declarations;
00212 draw->machine.NumDeclarations = vs->num_declarations;
00213
00214 draw->num_vs_outputs = vs->num_outputs;
00215
00216
00217 spu_exec_machine_init(&draw->machine,
00218 PIPE_MAX_SAMPLERS,
00219 NULL ,
00220 PIPE_SHADER_VERTEX);
00221
00222 wait_on_mask(1 << TAG_VERTEX_BUFFER);
00223
00224 (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
00225 sizeof(float) * 4 * vs->num_immediates);
00226 }
00227
00228
00229 void
00230 spu_execute_vertex_shader(struct spu_vs_context *draw,
00231 const struct cell_command_vs *vs)
00232 {
00233 unsigned i;
00234
00235 (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
00236 draw->nr_planes = vs->nr_planes;
00237 draw->vertex_fetch.nr_attrs = vs->nr_attrs;
00238
00239 for (i = 0; i < vs->num_elts; i += 4) {
00240 const unsigned batch_size = MIN2(vs->num_elts - i, 4);
00241
00242 run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
00243 }
00244 }