Gallium3D: gallium/drivers/cell/spu/spu_vertex

00001 /**************************************************************************
00002  * 
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028  /*
00029   * Authors:
00030   *   Keith Whitwell <keith@tungstengraphics.com>
00031   *   Brian Paul
00032   *   Ian Romanick <idr@us.ibm.com>
00033   */
00034 
00035 #include <spu_mfcio.h>
00036 
00037 #include "pipe/p_state.h"
00038 #include "pipe/p_shader_tokens.h"
00039 #include "util/u_math.h"
00040 #include "draw/draw_private.h"
00041 #include "draw/draw_context.h"
00042 #include "cell/common.h"
00043 #include "spu_vertex_shader.h"
00044 #include "spu_exec.h"
00045 #include "spu_main.h"
00046 
00047 
00048 #define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
00049 
00050 
00051 #define CLIP_RIGHT_BIT 0x01
00052 #define CLIP_LEFT_BIT 0x02
00053 #define CLIP_TOP_BIT 0x04
00054 #define CLIP_BOTTOM_BIT 0x08
00055 #define CLIP_FAR_BIT 0x10
00056 #define CLIP_NEAR_BIT 0x20
00057 
00058 
00059 static INLINE float
00060 dot4(const float *a, const float *b)
00061 {
00062    return (a[0]*b[0] +
00063            a[1]*b[1] +
00064            a[2]*b[2] +
00065            a[3]*b[3]);
00066 }
00067 
00068 static INLINE unsigned
00069 compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
00070 {
00071    unsigned mask = 0;
00072    unsigned i;
00073 
00074    /* Do the hardwired planes first:
00075     */
00076    if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
00077    if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
00078    if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
00079    if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
00080    if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
00081    if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
00082 
00083    /* Followed by any remaining ones:
00084     */
00085    for (i = 6; i < nr; i++) {
00086       if (dot4(clip, plane[i]) < 0) 
00087          mask |= (1<<i);
00088    }
00089 
00090    return mask;
00091 }
00092 
00093 
00102 static void
00103 run_vertex_program(struct spu_vs_context *draw,
00104                    unsigned elts[4], unsigned count,
00105                    const uint64_t *vOut)
00106 {
00107    struct spu_exec_machine *machine = &draw->machine;
00108    unsigned int j;
00109 
00110    ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS);
00111    ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS);
00112    const float *scale = draw->viewport.scale;
00113    const float *trans = draw->viewport.translate;
00114 
00115    ASSERT(count <= 4);
00116 
00117    machine->Processor = TGSI_PROCESSOR_VERTEX;
00118 
00119    ASSERT_ALIGN16(draw->constants);
00120    machine->Consts = (float (*)[4]) draw->constants;
00121 
00122    machine->Inputs = ALIGN16_ASSIGN(inputs);
00123    machine->Outputs = ALIGN16_ASSIGN(outputs);
00124 
00125    spu_vertex_fetch( draw, machine, elts, count );
00126 
00127    /* run shader */
00128    spu_exec_machine_run( machine );
00129 
00130 
00131    /* store machine results */
00132    for (j = 0; j < count; j++) {
00133       unsigned slot;
00134       float x, y, z, w;
00135       unsigned char buffer[sizeof(struct vertex_header)
00136           + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
00137       struct vertex_header *const tmpOut =
00138           (struct vertex_header *) buffer;
00139       const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
00140                                            + (sizeof(float) * 4 
00141                                               * draw->num_vs_outputs));
00142 
00143       mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
00144       wait_on_mask(1 << TAG_VERTEX_BUFFER);
00145 
00146 
00147       /* Handle attr[0] (position) specially:
00148        *
00149        * XXX: Computing the clipmask should be done in the vertex
00150        * program as a set of DP4 instructions appended to the
00151        * user-provided code.
00152        */
00153       x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
00154       y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
00155       z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
00156       w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
00157 
00158       tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
00159                                            draw->nr_planes);
00160       tmpOut->edgeflag = 1;
00161 
00162       /* divide by w */
00163       w = 1.0f / w;
00164       x *= w;
00165       y *= w;
00166       z *= w;
00167 
00168       /* Viewport mapping */
00169       tmpOut->data[0][0] = x * scale[0] + trans[0];
00170       tmpOut->data[0][1] = y * scale[1] + trans[1];
00171       tmpOut->data[0][2] = z * scale[2] + trans[2];
00172       tmpOut->data[0][3] = w;
00173 
00174       /* Remaining attributes are packed into sequential post-transform
00175        * vertex attrib slots.
00176        */
00177       for (slot = 1; slot < draw->num_vs_outputs; slot++) {
00178          tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
00179          tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
00180          tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
00181          tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
00182       }
00183 
00184       mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
00185    } /* loop over vertices */
00186 }
00187 
00188 
00189 unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
00190     ALIGN16_ATTRIB;
00191 
00192 
00193 void
00194 spu_bind_vertex_shader(struct spu_vs_context *draw,
00195                        struct cell_shader_info *vs)
00196 {
00197    const unsigned immediate_addr = vs->immediates;
00198    const unsigned immediate_size = 
00199        ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
00200                  + (immediate_addr & 0x0f));
00201  
00202 
00203    mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
00204            TAG_VERTEX_BUFFER, 0, 0);
00205 
00206    draw->machine.Instructions = (struct tgsi_full_instruction *)
00207        vs->instructions;
00208    draw->machine.NumInstructions = vs->num_instructions;
00209 
00210    draw->machine.Declarations = (struct tgsi_full_declaration *)
00211        vs->declarations;
00212    draw->machine.NumDeclarations = vs->num_declarations;
00213 
00214    draw->num_vs_outputs = vs->num_outputs;
00215 
00216    /* specify the shader to interpret/execute */
00217    spu_exec_machine_init(&draw->machine,
00218                          PIPE_MAX_SAMPLERS,
00219                          NULL /*samplers*/,
00220                          PIPE_SHADER_VERTEX);
00221 
00222    wait_on_mask(1 << TAG_VERTEX_BUFFER);
00223 
00224    (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
00225                  sizeof(float) * 4 * vs->num_immediates);
00226 }
00227 
00228 
00229 void
00230 spu_execute_vertex_shader(struct spu_vs_context *draw,
00231                           const struct cell_command_vs *vs)
00232 {
00233    unsigned i;
00234 
00235    (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
00236    draw->nr_planes = vs->nr_planes;
00237    draw->vertex_fetch.nr_attrs = vs->nr_attrs;
00238 
00239    for (i = 0; i < vs->num_elts; i += 4) {
00240       const unsigned batch_size = MIN2(vs->num_elts - i, 4);
00241 
00242       run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
00243    }
00244 }
spu_vertex_shader.c