00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <inttypes.h>
00026 #include "pipe/p_defines.h"
00027 #include "pipe/p_context.h"
00028 #include "pipe/p_format.h"
00029
00030 #include "../auxiliary/draw/draw_context.h"
00031 #include "../auxiliary/draw/draw_private.h"
00032
00033 #include "cell_context.h"
00034 #include "rtasm/rtasm_ppc_spe.h"
00035
00036
00058 static void
00059 emit_matrix_transpose(struct spe_function *p,
00060 unsigned row0, unsigned row1, unsigned row2,
00061 unsigned row3, unsigned dest_ptr,
00062 unsigned shuf_ptr, unsigned count)
00063 {
00064 int shuf_hi = spe_allocate_available_register(p);
00065 int shuf_lo = spe_allocate_available_register(p);
00066 int t1 = spe_allocate_available_register(p);
00067 int t2 = spe_allocate_available_register(p);
00068 int t3;
00069 int t4;
00070 int col0;
00071 int col1;
00072 int col2;
00073 int col3;
00074
00075
00076 spe_lqd(p, shuf_hi, shuf_ptr, 3);
00077 spe_lqd(p, shuf_lo, shuf_ptr, 4);
00078 spe_shufb(p, t1, row0, row2, shuf_hi);
00079 spe_shufb(p, t2, row0, row2, shuf_lo);
00080
00081
00082
00083
00084
00085 t3 = row0;
00086 t4 = row2;
00087
00088 spe_shufb(p, t3, row1, row3, shuf_hi);
00089 spe_shufb(p, t4, row1, row3, shuf_lo);
00090
00091
00092
00093
00094
00095 col0 = row1;
00096 col1 = row3;
00097
00098 spe_shufb(p, col0, t1, t3, shuf_hi);
00099 if (count > 1) {
00100 spe_shufb(p, col1, t1, t3, shuf_lo);
00101 }
00102
00103
00104
00105
00106 col2 = t1;
00107 col3 = t3;
00108
00109 if (count > 2) {
00110 spe_shufb(p, col2, t2, t4, shuf_hi);
00111 }
00112
00113 if (count > 3) {
00114 spe_shufb(p, col3, t2, t4, shuf_lo);
00115 }
00116
00117
00118
00119
00120
00121
00122
00123 switch (count) {
00124 case 4:
00125 spe_stqd(p, col3, dest_ptr, 3);
00126 case 3:
00127 spe_stqd(p, col2, dest_ptr, 2);
00128 case 2:
00129 spe_stqd(p, col1, dest_ptr, 1);
00130 case 1:
00131 spe_stqd(p, col0, dest_ptr, 0);
00132 }
00133
00134
00135
00136
00137 spe_release_register(p, col0);
00138 spe_release_register(p, col1);
00139 spe_release_register(p, col2);
00140 spe_release_register(p, col3);
00141 spe_release_register(p, shuf_hi);
00142 spe_release_register(p, shuf_lo);
00143 spe_release_register(p, t2);
00144 spe_release_register(p, t4);
00145 }
00146
00147
00148 static void
00149 emit_fetch(struct spe_function *p,
00150 unsigned in_ptr, unsigned *offset,
00151 unsigned out_ptr, unsigned shuf_ptr,
00152 enum pipe_format format)
00153 {
00154 const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
00155 + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
00156 const unsigned type = pf_type(format);
00157 const unsigned bytes = pf_size_x(format);
00158
00159 int v0 = spe_allocate_available_register(p);
00160 int v1 = spe_allocate_available_register(p);
00161 int v2 = spe_allocate_available_register(p);
00162 int v3 = spe_allocate_available_register(p);
00163 int tmp = spe_allocate_available_register(p);
00164 int float_zero = -1;
00165 int float_one = -1;
00166 float scale_signed = 0.0;
00167 float scale_unsigned = 0.0;
00168
00169 spe_lqd(p, v0, in_ptr, 0 + offset[0]);
00170 spe_lqd(p, v1, in_ptr, 1 + offset[0]);
00171 spe_lqd(p, v2, in_ptr, 2 + offset[0]);
00172 spe_lqd(p, v3, in_ptr, 3 + offset[0]);
00173 offset[0] += 4;
00174
00175 switch (bytes) {
00176 case 1:
00177 scale_signed = 1.0f / 127.0f;
00178 scale_unsigned = 1.0f / 255.0f;
00179 spe_lqd(p, tmp, shuf_ptr, 1);
00180 spe_shufb(p, v0, v0, v0, tmp);
00181 spe_shufb(p, v1, v1, v1, tmp);
00182 spe_shufb(p, v2, v2, v2, tmp);
00183 spe_shufb(p, v3, v3, v3, tmp);
00184 break;
00185 case 2:
00186 scale_signed = 1.0f / 32767.0f;
00187 scale_unsigned = 1.0f / 65535.0f;
00188 spe_lqd(p, tmp, shuf_ptr, 2);
00189 spe_shufb(p, v0, v0, v0, tmp);
00190 spe_shufb(p, v1, v1, v1, tmp);
00191 spe_shufb(p, v2, v2, v2, tmp);
00192 spe_shufb(p, v3, v3, v3, tmp);
00193 break;
00194 case 4:
00195 scale_signed = 1.0f / 2147483647.0f;
00196 scale_unsigned = 1.0f / 4294967295.0f;
00197 break;
00198 default:
00199 assert(0);
00200 break;
00201 }
00202
00203 switch (type) {
00204 case PIPE_FORMAT_TYPE_FLOAT:
00205 break;
00206 case PIPE_FORMAT_TYPE_UNORM:
00207 spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
00208 spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
00209 spe_cuflt(p, v0, v0, 0);
00210 spe_fm(p, v0, v0, tmp);
00211 break;
00212 case PIPE_FORMAT_TYPE_SNORM:
00213 spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
00214 spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
00215 spe_csflt(p, v0, v0, 0);
00216 spe_fm(p, v0, v0, tmp);
00217 break;
00218 case PIPE_FORMAT_TYPE_USCALED:
00219 spe_cuflt(p, v0, v0, 0);
00220 break;
00221 case PIPE_FORMAT_TYPE_SSCALED:
00222 spe_csflt(p, v0, v0, 0);
00223 break;
00224 }
00225
00226
00227 if (count < 4) {
00228 float_one = spe_allocate_available_register(p);
00229 spe_il(p, float_one, 1);
00230 spe_cuflt(p, float_one, float_one, 0);
00231
00232 if (count < 3) {
00233 float_zero = spe_allocate_available_register(p);
00234 spe_il(p, float_zero, 0);
00235 }
00236 }
00237
00238 spe_release_register(p, tmp);
00239
00240 emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
00241
00242 switch (count) {
00243 case 1:
00244 spe_stqd(p, float_zero, out_ptr, 1);
00245 case 2:
00246 spe_stqd(p, float_zero, out_ptr, 2);
00247 case 3:
00248 spe_stqd(p, float_one, out_ptr, 3);
00249 }
00250
00251 if (float_zero != -1) {
00252 spe_release_register(p, float_zero);
00253 }
00254
00255 if (float_one != -1) {
00256 spe_release_register(p, float_one);
00257 }
00258 }
00259
00260
00261 void cell_update_vertex_fetch(struct draw_context *draw)
00262 {
00263 #if 0
00264 struct cell_context *const cell =
00265 (struct cell_context *) draw->driver_private;
00266 struct spe_function *p = &cell->attrib_fetch;
00267 unsigned function_index[PIPE_MAX_ATTRIBS];
00268 unsigned unique_attr_formats;
00269 int out_ptr;
00270 int in_ptr;
00271 int shuf_ptr;
00272 unsigned i;
00273 unsigned j;
00274
00275
00276
00277
00278
00279
00280 unique_attr_formats = 1;
00281 function_index[0] = 0;
00282 for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
00283 const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
00284
00285 for (j = 0; j < i; j++) {
00286 if (curr_fmt == draw->vertex_element[j].src_format) {
00287 break;
00288 }
00289 }
00290
00291 if (j == i) {
00292 unique_attr_formats++;
00293 }
00294
00295 function_index[i] = j;
00296 }
00297
00298
00299
00300
00301
00302 spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
00303
00304
00305
00306
00307 out_ptr = spe_allocate_register(p, 3);
00308 in_ptr = spe_allocate_register(p, 4);
00309 shuf_ptr = spe_allocate_register(p, 5);
00310
00311
00312
00313
00314 for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
00315 unsigned offset;
00316
00317 if (function_index[i] == i) {
00318 cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr
00319 - (void *) p->store);
00320
00321 offset = 0;
00322 emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
00323 draw->vertex_element[i].src_format);
00324 spe_bi(p, 0, 0, 0);
00325
00326
00327
00328 if ((((unsigned) p->store) & 0x0f) != 0) {
00329 const unsigned align = ((unsigned) p->store) & 0x0f;
00330 p->store = (uint32_t *) (((void *) p->store) + align);
00331 }
00332 } else {
00333
00334
00335
00336 cell->attrib_fetch_offsets[i] =
00337 cell->attrib_fetch_offsets[function_index[i]];
00338 }
00339 }
00340 #else
00341 assert(0);
00342 #endif
00343 }