Gallium3D: gallium/drivers/cell/ppu/cell_vertex

void cell_update_vertex_fetch ( struct draw_context * draw )

Definition at line 261 of file cell_vertex_fetch.c.

References align(), assert, cell_context::attrib_fetch, cell_context::attrib_fetch_offsets, draw_context::driver_private, emit_fetch(), offset(), PIPE_MAX_ATTRIBS, spe_allocate_register(), spe_bi(), spe_init_func(), SPE_INST_SIZE, pipe_vertex_element::src_format, and draw_context::vertex_element.

00262 {
00263 #if 0
00264    struct cell_context *const cell =
00265        (struct cell_context *) draw->driver_private;
00266    struct spe_function *p = &cell->attrib_fetch;
00267    unsigned function_index[PIPE_MAX_ATTRIBS];
00268    unsigned unique_attr_formats;
00269    int out_ptr;
00270    int in_ptr;
00271    int shuf_ptr;
00272    unsigned i;
00273    unsigned j;
00274 
00275 
00276    /* Determine how many unique input attribute formats there are.  At the
00277     * same time, store the index of the lowest numbered attribute that has
00278     * the same format as any non-unique format.
00279     */
00280    unique_attr_formats = 1;
00281    function_index[0] = 0;
00282    for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
00283       const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
00284 
00285       for (j = 0; j < i; j++) {
00286          if (curr_fmt == draw->vertex_element[j].src_format) {
00287             break;
00288          }
00289       }
00290       
00291       if (j == i) {
00292          unique_attr_formats++;
00293       }
00294 
00295       function_index[i] = j;
00296    }
00297 
00298 
00299    /* Each fetch function can be a maximum of 34 instructions (note: this is
00300     * actually a slight over-estimate).
00301     */
00302    spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
00303 
00304 
00305    /* Allocate registers for the function's input parameters.
00306     */
00307    out_ptr = spe_allocate_register(p, 3);
00308    in_ptr = spe_allocate_register(p, 4);
00309    shuf_ptr = spe_allocate_register(p, 5);
00310 
00311 
00312    /* Generate code for the individual attribute fetch functions.
00313     */
00314    for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
00315       unsigned offset;
00316 
00317       if (function_index[i] == i) {
00318          cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr 
00319                                                      - (void *) p->store);
00320 
00321          offset = 0;
00322          emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
00323                     draw->vertex_element[i].src_format);
00324          spe_bi(p, 0, 0, 0);
00325 
00326          /* Round up to the next 16-byte boundary.
00327           */
00328          if ((((unsigned) p->store) & 0x0f) != 0) {
00329             const unsigned align = ((unsigned) p->store) & 0x0f;
00330             p->store = (uint32_t *) (((void *) p->store) + align);
00331          }
00332       } else {
00333          /* Use the same function entry-point as a previously seen attribute
00334           * with the same format.
00335           */
00336          cell->attrib_fetch_offsets[i] = 
00337              cell->attrib_fetch_offsets[function_index[i]];
00338       }
00339    }
00340 #else
00341    assert(0);
00342 #endif
00343 }

static void emit_fetch	(	struct spe_function *	p,
		unsigned	in_ptr,
		unsigned *	offset,
		unsigned	out_ptr,
		unsigned	shuf_ptr,
		enum pipe_format	format
	)			`[static]`

Definition at line 149 of file cell_vertex_fetch.c.

References assert, emit_matrix_transpose(), pf_size_w, pf_size_x, pf_size_y, pf_size_z, pf_type, PIPE_FORMAT_TYPE_FLOAT, PIPE_FORMAT_TYPE_SNORM, PIPE_FORMAT_TYPE_SSCALED, PIPE_FORMAT_TYPE_UNORM, PIPE_FORMAT_TYPE_USCALED, spe_allocate_available_register(), spe_csflt(), spe_cuflt(), spe_fm(), spe_il(), spe_ilhu(), spe_iohl(), spe_lqd(), spe_release_register(), spe_shufb(), and spe_stqd().

00153 {
00154    const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
00155        + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
00156    const unsigned type = pf_type(format);
00157    const unsigned bytes = pf_size_x(format);
00158 
00159    int v0 = spe_allocate_available_register(p);
00160    int v1 = spe_allocate_available_register(p);
00161    int v2 = spe_allocate_available_register(p);
00162    int v3 = spe_allocate_available_register(p);
00163    int tmp = spe_allocate_available_register(p);
00164    int float_zero = -1;
00165    int float_one = -1;
00166    float scale_signed = 0.0;
00167    float scale_unsigned = 0.0;
00168 
00169    spe_lqd(p, v0, in_ptr, 0 + offset[0]);
00170    spe_lqd(p, v1, in_ptr, 1 + offset[0]);
00171    spe_lqd(p, v2, in_ptr, 2 + offset[0]);
00172    spe_lqd(p, v3, in_ptr, 3 + offset[0]);
00173    offset[0] += 4;
00174    
00175    switch (bytes) {
00176    case 1:
00177       scale_signed = 1.0f / 127.0f;
00178       scale_unsigned = 1.0f / 255.0f;
00179       spe_lqd(p, tmp, shuf_ptr, 1);
00180       spe_shufb(p, v0, v0, v0, tmp);
00181       spe_shufb(p, v1, v1, v1, tmp);
00182       spe_shufb(p, v2, v2, v2, tmp);
00183       spe_shufb(p, v3, v3, v3, tmp);
00184       break;
00185    case 2:
00186       scale_signed = 1.0f / 32767.0f;
00187       scale_unsigned = 1.0f / 65535.0f;
00188       spe_lqd(p, tmp, shuf_ptr, 2);
00189       spe_shufb(p, v0, v0, v0, tmp);
00190       spe_shufb(p, v1, v1, v1, tmp);
00191       spe_shufb(p, v2, v2, v2, tmp);
00192       spe_shufb(p, v3, v3, v3, tmp);
00193       break;
00194    case 4:
00195       scale_signed = 1.0f / 2147483647.0f;
00196       scale_unsigned = 1.0f / 4294967295.0f;
00197       break;
00198    default:
00199       assert(0);
00200       break;
00201    }
00202 
00203    switch (type) {
00204    case PIPE_FORMAT_TYPE_FLOAT:
00205       break;
00206    case PIPE_FORMAT_TYPE_UNORM:
00207       spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
00208       spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
00209       spe_cuflt(p, v0, v0, 0);
00210       spe_fm(p, v0, v0, tmp);
00211       break;
00212    case PIPE_FORMAT_TYPE_SNORM:
00213       spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
00214       spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
00215       spe_csflt(p, v0, v0, 0);
00216       spe_fm(p, v0, v0, tmp);
00217       break;
00218    case PIPE_FORMAT_TYPE_USCALED:
00219       spe_cuflt(p, v0, v0, 0);
00220       break;
00221    case PIPE_FORMAT_TYPE_SSCALED:
00222       spe_csflt(p, v0, v0, 0);
00223       break;
00224    }
00225 
00226 
00227    if (count < 4) {
00228       float_one = spe_allocate_available_register(p);
00229       spe_il(p, float_one, 1);
00230       spe_cuflt(p, float_one, float_one, 0);
00231       
00232       if (count < 3) {
00233          float_zero = spe_allocate_available_register(p);
00234          spe_il(p, float_zero, 0);
00235       }
00236    }
00237 
00238    spe_release_register(p, tmp);
00239 
00240    emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
00241 
00242    switch (count) {
00243    case 1:
00244       spe_stqd(p, float_zero, out_ptr, 1);
00245    case 2:
00246       spe_stqd(p, float_zero, out_ptr, 2);
00247    case 3:
00248       spe_stqd(p, float_one, out_ptr, 3);
00249    }
00250 
00251    if (float_zero != -1) {
00252       spe_release_register(p, float_zero);
00253    }
00254 
00255    if (float_one != -1) {
00256       spe_release_register(p, float_one);
00257    }
00258 }

static void emit_matrix_transpose	(	struct spe_function *	p,
		unsigned	row0,
		unsigned	row1,
		unsigned	row2,
		unsigned	row3,
		unsigned	dest_ptr,
		unsigned	shuf_ptr,
		unsigned	count
	)			`[static]`

Emit a 4x4 matrix transpose operation.

Parameters:

	p	Function that the transpose operation is to be appended to
	row0	Register containing row 0 of the source matrix
	row1	Register containing row 1 of the source matrix
	row2	Register containing row 2 of the source matrix
	row3	Register containing row 3 of the source matrix
	dest_ptr	Register containing the address of the destination matrix
	shuf_ptr	Register containing the address of the shuffled data
	count	Number of colums to actually be written to the destination

Note:: This function assumes that the registers named by row0, row1, row2, and row3 are scratch and can be modified by the generated code. Furthermore, these registers will be released, via calls to release_register, by this function.
This function requires that four temporary are available on entry.

Definition at line 59 of file cell_vertex_fetch.c.

References spe_allocate_available_register(), spe_lqd(), spe_release_register(), spe_shufb(), and spe_stqd().

00063 {
00064    int shuf_hi = spe_allocate_available_register(p);
00065    int shuf_lo = spe_allocate_available_register(p);
00066    int t1 = spe_allocate_available_register(p);
00067    int t2 = spe_allocate_available_register(p);
00068    int t3;
00069    int t4;
00070    int col0;
00071    int col1;
00072    int col2;
00073    int col3;
00074 
00075 
00076    spe_lqd(p, shuf_hi, shuf_ptr, 3);
00077    spe_lqd(p, shuf_lo, shuf_ptr, 4);
00078    spe_shufb(p, t1, row0, row2, shuf_hi);
00079    spe_shufb(p, t2, row0, row2, shuf_lo);
00080 
00081 
00082    /* row0 and row2 are now no longer needed.  Re-use those registers as
00083     * temporaries.
00084     */
00085    t3 = row0;
00086    t4 = row2;
00087 
00088    spe_shufb(p, t3, row1, row3, shuf_hi);
00089    spe_shufb(p, t4, row1, row3, shuf_lo);
00090 
00091 
00092    /* row1 and row3 are now no longer needed.  Re-use those registers as
00093     * temporaries.
00094     */
00095    col0 = row1;
00096    col1 = row3;
00097 
00098    spe_shufb(p, col0, t1, t3, shuf_hi);
00099    if (count > 1) {
00100       spe_shufb(p, col1, t1, t3, shuf_lo);
00101    }
00102 
00103    /* t1 and t3 are now no longer needed.  Re-use those registers as
00104     * temporaries.
00105     */
00106    col2 = t1;
00107    col3 = t3;
00108 
00109    if (count > 2) {
00110       spe_shufb(p, col2, t2, t4, shuf_hi);
00111    }
00112 
00113    if (count > 3) {
00114       spe_shufb(p, col3, t2, t4, shuf_lo);
00115    }
00116 
00117 
00118    /* Store the results.  Remember that the stqd instruction is encoded using
00119     * the qword offset (stand-alone assemblers to the byte-offset to
00120     * qword-offset conversion for you), so the byte-offset needs be divided by
00121     * 16.
00122     */
00123    switch (count) {
00124    case 4:
00125       spe_stqd(p, col3, dest_ptr, 3);
00126    case 3:
00127       spe_stqd(p, col2, dest_ptr, 2);
00128    case 2:
00129       spe_stqd(p, col1, dest_ptr, 1);
00130    case 1:
00131       spe_stqd(p, col0, dest_ptr, 0);
00132    }
00133 
00134 
00135    /* Release all of the temporary registers used.
00136     */
00137    spe_release_register(p, col0);
00138    spe_release_register(p, col1);
00139    spe_release_register(p, col2);
00140    spe_release_register(p, col3);
00141    spe_release_register(p, shuf_hi);
00142    spe_release_register(p, shuf_lo);
00143    spe_release_register(p, t2);
00144    spe_release_register(p, t4);
00145 }

cell_vertex_fetch.c File Reference

Functions

Function Documentation


Functions
static void	emit_matrix_transpose (struct spe_function *p, unsigned row0, unsigned row1, unsigned row2, unsigned row3, unsigned dest_ptr, unsigned shuf_ptr, unsigned count)
	Emit a 4x4 matrix transpose operation.
static void	emit_fetch (struct spe_function p, unsigned in_ptr, unsigned offset, unsigned out_ptr, unsigned shuf_ptr, enum pipe_format format)
void	cell_update_vertex_fetch (struct draw_context *draw)