Go to the source code of this file.
Functions | |
static void | emit_matrix_transpose (struct spe_function *p, unsigned row0, unsigned row1, unsigned row2, unsigned row3, unsigned dest_ptr, unsigned shuf_ptr, unsigned count) |
Emit a 4x4 matrix transpose operation. | |
static void | emit_fetch (struct spe_function *p, unsigned in_ptr, unsigned *offset, unsigned out_ptr, unsigned shuf_ptr, enum pipe_format format) |
void | cell_update_vertex_fetch (struct draw_context *draw) |
void cell_update_vertex_fetch | ( | struct draw_context * | draw | ) |
Definition at line 261 of file cell_vertex_fetch.c.
References align(), assert, cell_context::attrib_fetch, cell_context::attrib_fetch_offsets, draw_context::driver_private, emit_fetch(), offset(), PIPE_MAX_ATTRIBS, spe_allocate_register(), spe_bi(), spe_init_func(), SPE_INST_SIZE, pipe_vertex_element::src_format, and draw_context::vertex_element.
00262 { 00263 #if 0 00264 struct cell_context *const cell = 00265 (struct cell_context *) draw->driver_private; 00266 struct spe_function *p = &cell->attrib_fetch; 00267 unsigned function_index[PIPE_MAX_ATTRIBS]; 00268 unsigned unique_attr_formats; 00269 int out_ptr; 00270 int in_ptr; 00271 int shuf_ptr; 00272 unsigned i; 00273 unsigned j; 00274 00275 00276 /* Determine how many unique input attribute formats there are. At the 00277 * same time, store the index of the lowest numbered attribute that has 00278 * the same format as any non-unique format. 00279 */ 00280 unique_attr_formats = 1; 00281 function_index[0] = 0; 00282 for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { 00283 const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; 00284 00285 for (j = 0; j < i; j++) { 00286 if (curr_fmt == draw->vertex_element[j].src_format) { 00287 break; 00288 } 00289 } 00290 00291 if (j == i) { 00292 unique_attr_formats++; 00293 } 00294 00295 function_index[i] = j; 00296 } 00297 00298 00299 /* Each fetch function can be a maximum of 34 instructions (note: this is 00300 * actually a slight over-estimate). 00301 */ 00302 spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); 00303 00304 00305 /* Allocate registers for the function's input parameters. 00306 */ 00307 out_ptr = spe_allocate_register(p, 3); 00308 in_ptr = spe_allocate_register(p, 4); 00309 shuf_ptr = spe_allocate_register(p, 5); 00310 00311 00312 /* Generate code for the individual attribute fetch functions. 00313 */ 00314 for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { 00315 unsigned offset; 00316 00317 if (function_index[i] == i) { 00318 cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr 00319 - (void *) p->store); 00320 00321 offset = 0; 00322 emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, 00323 draw->vertex_element[i].src_format); 00324 spe_bi(p, 0, 0, 0); 00325 00326 /* Round up to the next 16-byte boundary. 00327 */ 00328 if ((((unsigned) p->store) & 0x0f) != 0) { 00329 const unsigned align = ((unsigned) p->store) & 0x0f; 00330 p->store = (uint32_t *) (((void *) p->store) + align); 00331 } 00332 } else { 00333 /* Use the same function entry-point as a previously seen attribute 00334 * with the same format. 00335 */ 00336 cell->attrib_fetch_offsets[i] = 00337 cell->attrib_fetch_offsets[function_index[i]]; 00338 } 00339 } 00340 #else 00341 assert(0); 00342 #endif 00343 }
static void emit_fetch | ( | struct spe_function * | p, | |
unsigned | in_ptr, | |||
unsigned * | offset, | |||
unsigned | out_ptr, | |||
unsigned | shuf_ptr, | |||
enum pipe_format | format | |||
) | [static] |
Definition at line 149 of file cell_vertex_fetch.c.
References assert, emit_matrix_transpose(), pf_size_w, pf_size_x, pf_size_y, pf_size_z, pf_type, PIPE_FORMAT_TYPE_FLOAT, PIPE_FORMAT_TYPE_SNORM, PIPE_FORMAT_TYPE_SSCALED, PIPE_FORMAT_TYPE_UNORM, PIPE_FORMAT_TYPE_USCALED, spe_allocate_available_register(), spe_csflt(), spe_cuflt(), spe_fm(), spe_il(), spe_ilhu(), spe_iohl(), spe_lqd(), spe_release_register(), spe_shufb(), and spe_stqd().
00153 { 00154 const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) 00155 + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); 00156 const unsigned type = pf_type(format); 00157 const unsigned bytes = pf_size_x(format); 00158 00159 int v0 = spe_allocate_available_register(p); 00160 int v1 = spe_allocate_available_register(p); 00161 int v2 = spe_allocate_available_register(p); 00162 int v3 = spe_allocate_available_register(p); 00163 int tmp = spe_allocate_available_register(p); 00164 int float_zero = -1; 00165 int float_one = -1; 00166 float scale_signed = 0.0; 00167 float scale_unsigned = 0.0; 00168 00169 spe_lqd(p, v0, in_ptr, 0 + offset[0]); 00170 spe_lqd(p, v1, in_ptr, 1 + offset[0]); 00171 spe_lqd(p, v2, in_ptr, 2 + offset[0]); 00172 spe_lqd(p, v3, in_ptr, 3 + offset[0]); 00173 offset[0] += 4; 00174 00175 switch (bytes) { 00176 case 1: 00177 scale_signed = 1.0f / 127.0f; 00178 scale_unsigned = 1.0f / 255.0f; 00179 spe_lqd(p, tmp, shuf_ptr, 1); 00180 spe_shufb(p, v0, v0, v0, tmp); 00181 spe_shufb(p, v1, v1, v1, tmp); 00182 spe_shufb(p, v2, v2, v2, tmp); 00183 spe_shufb(p, v3, v3, v3, tmp); 00184 break; 00185 case 2: 00186 scale_signed = 1.0f / 32767.0f; 00187 scale_unsigned = 1.0f / 65535.0f; 00188 spe_lqd(p, tmp, shuf_ptr, 2); 00189 spe_shufb(p, v0, v0, v0, tmp); 00190 spe_shufb(p, v1, v1, v1, tmp); 00191 spe_shufb(p, v2, v2, v2, tmp); 00192 spe_shufb(p, v3, v3, v3, tmp); 00193 break; 00194 case 4: 00195 scale_signed = 1.0f / 2147483647.0f; 00196 scale_unsigned = 1.0f / 4294967295.0f; 00197 break; 00198 default: 00199 assert(0); 00200 break; 00201 } 00202 00203 switch (type) { 00204 case PIPE_FORMAT_TYPE_FLOAT: 00205 break; 00206 case PIPE_FORMAT_TYPE_UNORM: 00207 spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); 00208 spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); 00209 spe_cuflt(p, v0, v0, 0); 00210 spe_fm(p, v0, v0, tmp); 00211 break; 00212 case PIPE_FORMAT_TYPE_SNORM: 00213 spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); 00214 spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); 00215 spe_csflt(p, v0, v0, 0); 00216 spe_fm(p, v0, v0, tmp); 00217 break; 00218 case PIPE_FORMAT_TYPE_USCALED: 00219 spe_cuflt(p, v0, v0, 0); 00220 break; 00221 case PIPE_FORMAT_TYPE_SSCALED: 00222 spe_csflt(p, v0, v0, 0); 00223 break; 00224 } 00225 00226 00227 if (count < 4) { 00228 float_one = spe_allocate_available_register(p); 00229 spe_il(p, float_one, 1); 00230 spe_cuflt(p, float_one, float_one, 0); 00231 00232 if (count < 3) { 00233 float_zero = spe_allocate_available_register(p); 00234 spe_il(p, float_zero, 0); 00235 } 00236 } 00237 00238 spe_release_register(p, tmp); 00239 00240 emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); 00241 00242 switch (count) { 00243 case 1: 00244 spe_stqd(p, float_zero, out_ptr, 1); 00245 case 2: 00246 spe_stqd(p, float_zero, out_ptr, 2); 00247 case 3: 00248 spe_stqd(p, float_one, out_ptr, 3); 00249 } 00250 00251 if (float_zero != -1) { 00252 spe_release_register(p, float_zero); 00253 } 00254 00255 if (float_one != -1) { 00256 spe_release_register(p, float_one); 00257 } 00258 }
static void emit_matrix_transpose | ( | struct spe_function * | p, | |
unsigned | row0, | |||
unsigned | row1, | |||
unsigned | row2, | |||
unsigned | row3, | |||
unsigned | dest_ptr, | |||
unsigned | shuf_ptr, | |||
unsigned | count | |||
) | [static] |
Emit a 4x4 matrix transpose operation.
p | Function that the transpose operation is to be appended to | |
row0 | Register containing row 0 of the source matrix | |
row1 | Register containing row 1 of the source matrix | |
row2 | Register containing row 2 of the source matrix | |
row3 | Register containing row 3 of the source matrix | |
dest_ptr | Register containing the address of the destination matrix | |
shuf_ptr | Register containing the address of the shuffled data | |
count | Number of colums to actually be written to the destination |
row0
, row1
, row2
, and row3
are scratch and can be modified by the generated code. Furthermore, these registers will be released, via calls to release_register
, by this function.This function requires that four temporary are available on entry.
Definition at line 59 of file cell_vertex_fetch.c.
References spe_allocate_available_register(), spe_lqd(), spe_release_register(), spe_shufb(), and spe_stqd().
00063 { 00064 int shuf_hi = spe_allocate_available_register(p); 00065 int shuf_lo = spe_allocate_available_register(p); 00066 int t1 = spe_allocate_available_register(p); 00067 int t2 = spe_allocate_available_register(p); 00068 int t3; 00069 int t4; 00070 int col0; 00071 int col1; 00072 int col2; 00073 int col3; 00074 00075 00076 spe_lqd(p, shuf_hi, shuf_ptr, 3); 00077 spe_lqd(p, shuf_lo, shuf_ptr, 4); 00078 spe_shufb(p, t1, row0, row2, shuf_hi); 00079 spe_shufb(p, t2, row0, row2, shuf_lo); 00080 00081 00082 /* row0 and row2 are now no longer needed. Re-use those registers as 00083 * temporaries. 00084 */ 00085 t3 = row0; 00086 t4 = row2; 00087 00088 spe_shufb(p, t3, row1, row3, shuf_hi); 00089 spe_shufb(p, t4, row1, row3, shuf_lo); 00090 00091 00092 /* row1 and row3 are now no longer needed. Re-use those registers as 00093 * temporaries. 00094 */ 00095 col0 = row1; 00096 col1 = row3; 00097 00098 spe_shufb(p, col0, t1, t3, shuf_hi); 00099 if (count > 1) { 00100 spe_shufb(p, col1, t1, t3, shuf_lo); 00101 } 00102 00103 /* t1 and t3 are now no longer needed. Re-use those registers as 00104 * temporaries. 00105 */ 00106 col2 = t1; 00107 col3 = t3; 00108 00109 if (count > 2) { 00110 spe_shufb(p, col2, t2, t4, shuf_hi); 00111 } 00112 00113 if (count > 3) { 00114 spe_shufb(p, col3, t2, t4, shuf_lo); 00115 } 00116 00117 00118 /* Store the results. Remember that the stqd instruction is encoded using 00119 * the qword offset (stand-alone assemblers to the byte-offset to 00120 * qword-offset conversion for you), so the byte-offset needs be divided by 00121 * 16. 00122 */ 00123 switch (count) { 00124 case 4: 00125 spe_stqd(p, col3, dest_ptr, 3); 00126 case 3: 00127 spe_stqd(p, col2, dest_ptr, 2); 00128 case 2: 00129 spe_stqd(p, col1, dest_ptr, 1); 00130 case 1: 00131 spe_stqd(p, col0, dest_ptr, 0); 00132 } 00133 00134 00135 /* Release all of the temporary registers used. 00136 */ 00137 spe_release_register(p, col0); 00138 spe_release_register(p, col1); 00139 spe_release_register(p, col2); 00140 spe_release_register(p, col3); 00141 spe_release_register(p, shuf_hi); 00142 spe_release_register(p, shuf_lo); 00143 spe_release_register(p, t2); 00144 spe_release_register(p, t4); 00145 }