cell_vertex_fetch.c

Go to the documentation of this file.
00001 /*
00002  * (C) Copyright IBM Corporation 2008
00003  * All Rights Reserved.
00004  *
00005  * Permission is hereby granted, free of charge, to any person obtaining a
00006  * copy of this software and associated documentation files (the "Software"),
00007  * to deal in the Software without restriction, including without limitation
00008  * on the rights to use, copy, modify, merge, publish, distribute, sub
00009  * license, and/or sell copies of the Software, and to permit persons to whom
00010  * the Software is furnished to do so, subject to the following conditions:
00011  *
00012  * The above copyright notice and this permission notice (including the next
00013  * paragraph) shall be included in all copies or substantial portions of the
00014  * Software.
00015  *
00016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
00019  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
00020  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
00021  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
00022  * USE OR OTHER DEALINGS IN THE SOFTWARE.
00023  */
00024 
00025 #include <inttypes.h>
00026 #include "pipe/p_defines.h"
00027 #include "pipe/p_context.h"
00028 #include "pipe/p_format.h"
00029 
00030 #include "../auxiliary/draw/draw_context.h"
00031 #include "../auxiliary/draw/draw_private.h"
00032 
00033 #include "cell_context.h"
00034 #include "rtasm/rtasm_ppc_spe.h"
00035 
00036 
00058 static void
00059 emit_matrix_transpose(struct spe_function *p,
00060                       unsigned row0, unsigned row1, unsigned row2,
00061                       unsigned row3, unsigned dest_ptr,
00062                       unsigned shuf_ptr, unsigned count)
00063 {
00064    int shuf_hi = spe_allocate_available_register(p);
00065    int shuf_lo = spe_allocate_available_register(p);
00066    int t1 = spe_allocate_available_register(p);
00067    int t2 = spe_allocate_available_register(p);
00068    int t3;
00069    int t4;
00070    int col0;
00071    int col1;
00072    int col2;
00073    int col3;
00074 
00075 
00076    spe_lqd(p, shuf_hi, shuf_ptr, 3);
00077    spe_lqd(p, shuf_lo, shuf_ptr, 4);
00078    spe_shufb(p, t1, row0, row2, shuf_hi);
00079    spe_shufb(p, t2, row0, row2, shuf_lo);
00080 
00081 
00082    /* row0 and row2 are now no longer needed.  Re-use those registers as
00083     * temporaries.
00084     */
00085    t3 = row0;
00086    t4 = row2;
00087 
00088    spe_shufb(p, t3, row1, row3, shuf_hi);
00089    spe_shufb(p, t4, row1, row3, shuf_lo);
00090 
00091 
00092    /* row1 and row3 are now no longer needed.  Re-use those registers as
00093     * temporaries.
00094     */
00095    col0 = row1;
00096    col1 = row3;
00097 
00098    spe_shufb(p, col0, t1, t3, shuf_hi);
00099    if (count > 1) {
00100       spe_shufb(p, col1, t1, t3, shuf_lo);
00101    }
00102 
00103    /* t1 and t3 are now no longer needed.  Re-use those registers as
00104     * temporaries.
00105     */
00106    col2 = t1;
00107    col3 = t3;
00108 
00109    if (count > 2) {
00110       spe_shufb(p, col2, t2, t4, shuf_hi);
00111    }
00112 
00113    if (count > 3) {
00114       spe_shufb(p, col3, t2, t4, shuf_lo);
00115    }
00116 
00117 
00118    /* Store the results.  Remember that the stqd instruction is encoded using
00119     * the qword offset (stand-alone assemblers to the byte-offset to
00120     * qword-offset conversion for you), so the byte-offset needs be divided by
00121     * 16.
00122     */
00123    switch (count) {
00124    case 4:
00125       spe_stqd(p, col3, dest_ptr, 3);
00126    case 3:
00127       spe_stqd(p, col2, dest_ptr, 2);
00128    case 2:
00129       spe_stqd(p, col1, dest_ptr, 1);
00130    case 1:
00131       spe_stqd(p, col0, dest_ptr, 0);
00132    }
00133 
00134 
00135    /* Release all of the temporary registers used.
00136     */
00137    spe_release_register(p, col0);
00138    spe_release_register(p, col1);
00139    spe_release_register(p, col2);
00140    spe_release_register(p, col3);
00141    spe_release_register(p, shuf_hi);
00142    spe_release_register(p, shuf_lo);
00143    spe_release_register(p, t2);
00144    spe_release_register(p, t4);
00145 }
00146 
00147 
00148 static void
00149 emit_fetch(struct spe_function *p,
00150            unsigned in_ptr, unsigned *offset,
00151            unsigned out_ptr, unsigned shuf_ptr,
00152            enum pipe_format format)
00153 {
00154    const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
00155        + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
00156    const unsigned type = pf_type(format);
00157    const unsigned bytes = pf_size_x(format);
00158 
00159    int v0 = spe_allocate_available_register(p);
00160    int v1 = spe_allocate_available_register(p);
00161    int v2 = spe_allocate_available_register(p);
00162    int v3 = spe_allocate_available_register(p);
00163    int tmp = spe_allocate_available_register(p);
00164    int float_zero = -1;
00165    int float_one = -1;
00166    float scale_signed = 0.0;
00167    float scale_unsigned = 0.0;
00168 
00169    spe_lqd(p, v0, in_ptr, 0 + offset[0]);
00170    spe_lqd(p, v1, in_ptr, 1 + offset[0]);
00171    spe_lqd(p, v2, in_ptr, 2 + offset[0]);
00172    spe_lqd(p, v3, in_ptr, 3 + offset[0]);
00173    offset[0] += 4;
00174    
00175    switch (bytes) {
00176    case 1:
00177       scale_signed = 1.0f / 127.0f;
00178       scale_unsigned = 1.0f / 255.0f;
00179       spe_lqd(p, tmp, shuf_ptr, 1);
00180       spe_shufb(p, v0, v0, v0, tmp);
00181       spe_shufb(p, v1, v1, v1, tmp);
00182       spe_shufb(p, v2, v2, v2, tmp);
00183       spe_shufb(p, v3, v3, v3, tmp);
00184       break;
00185    case 2:
00186       scale_signed = 1.0f / 32767.0f;
00187       scale_unsigned = 1.0f / 65535.0f;
00188       spe_lqd(p, tmp, shuf_ptr, 2);
00189       spe_shufb(p, v0, v0, v0, tmp);
00190       spe_shufb(p, v1, v1, v1, tmp);
00191       spe_shufb(p, v2, v2, v2, tmp);
00192       spe_shufb(p, v3, v3, v3, tmp);
00193       break;
00194    case 4:
00195       scale_signed = 1.0f / 2147483647.0f;
00196       scale_unsigned = 1.0f / 4294967295.0f;
00197       break;
00198    default:
00199       assert(0);
00200       break;
00201    }
00202 
00203    switch (type) {
00204    case PIPE_FORMAT_TYPE_FLOAT:
00205       break;
00206    case PIPE_FORMAT_TYPE_UNORM:
00207       spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
00208       spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
00209       spe_cuflt(p, v0, v0, 0);
00210       spe_fm(p, v0, v0, tmp);
00211       break;
00212    case PIPE_FORMAT_TYPE_SNORM:
00213       spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
00214       spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
00215       spe_csflt(p, v0, v0, 0);
00216       spe_fm(p, v0, v0, tmp);
00217       break;
00218    case PIPE_FORMAT_TYPE_USCALED:
00219       spe_cuflt(p, v0, v0, 0);
00220       break;
00221    case PIPE_FORMAT_TYPE_SSCALED:
00222       spe_csflt(p, v0, v0, 0);
00223       break;
00224    }
00225 
00226 
00227    if (count < 4) {
00228       float_one = spe_allocate_available_register(p);
00229       spe_il(p, float_one, 1);
00230       spe_cuflt(p, float_one, float_one, 0);
00231       
00232       if (count < 3) {
00233          float_zero = spe_allocate_available_register(p);
00234          spe_il(p, float_zero, 0);
00235       }
00236    }
00237 
00238    spe_release_register(p, tmp);
00239 
00240    emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
00241 
00242    switch (count) {
00243    case 1:
00244       spe_stqd(p, float_zero, out_ptr, 1);
00245    case 2:
00246       spe_stqd(p, float_zero, out_ptr, 2);
00247    case 3:
00248       spe_stqd(p, float_one, out_ptr, 3);
00249    }
00250 
00251    if (float_zero != -1) {
00252       spe_release_register(p, float_zero);
00253    }
00254 
00255    if (float_one != -1) {
00256       spe_release_register(p, float_one);
00257    }
00258 }
00259 
00260 
00261 void cell_update_vertex_fetch(struct draw_context *draw)
00262 {
00263 #if 0
00264    struct cell_context *const cell =
00265        (struct cell_context *) draw->driver_private;
00266    struct spe_function *p = &cell->attrib_fetch;
00267    unsigned function_index[PIPE_MAX_ATTRIBS];
00268    unsigned unique_attr_formats;
00269    int out_ptr;
00270    int in_ptr;
00271    int shuf_ptr;
00272    unsigned i;
00273    unsigned j;
00274 
00275 
00276    /* Determine how many unique input attribute formats there are.  At the
00277     * same time, store the index of the lowest numbered attribute that has
00278     * the same format as any non-unique format.
00279     */
00280    unique_attr_formats = 1;
00281    function_index[0] = 0;
00282    for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
00283       const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
00284 
00285       for (j = 0; j < i; j++) {
00286          if (curr_fmt == draw->vertex_element[j].src_format) {
00287             break;
00288          }
00289       }
00290       
00291       if (j == i) {
00292          unique_attr_formats++;
00293       }
00294 
00295       function_index[i] = j;
00296    }
00297 
00298 
00299    /* Each fetch function can be a maximum of 34 instructions (note: this is
00300     * actually a slight over-estimate).
00301     */
00302    spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
00303 
00304 
00305    /* Allocate registers for the function's input parameters.
00306     */
00307    out_ptr = spe_allocate_register(p, 3);
00308    in_ptr = spe_allocate_register(p, 4);
00309    shuf_ptr = spe_allocate_register(p, 5);
00310 
00311 
00312    /* Generate code for the individual attribute fetch functions.
00313     */
00314    for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
00315       unsigned offset;
00316 
00317       if (function_index[i] == i) {
00318          cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr 
00319                                                      - (void *) p->store);
00320 
00321          offset = 0;
00322          emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
00323                     draw->vertex_element[i].src_format);
00324          spe_bi(p, 0, 0, 0);
00325 
00326          /* Round up to the next 16-byte boundary.
00327           */
00328          if ((((unsigned) p->store) & 0x0f) != 0) {
00329             const unsigned align = ((unsigned) p->store) & 0x0f;
00330             p->store = (uint32_t *) (((void *) p->store) + align);
00331          }
00332       } else {
00333          /* Use the same function entry-point as a previously seen attribute
00334           * with the same format.
00335           */
00336          cell->attrib_fetch_offsets[i] = 
00337              cell->attrib_fetch_offsets[function_index[i]];
00338       }
00339    }
00340 #else
00341    assert(0);
00342 #endif
00343 }

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4