00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00053 #include <transpose_matrix4x4.h>
00054 #include <simdmath/ceilf4.h>
00055 #include <simdmath/cosf4.h>
00056 #include <simdmath/divf4.h>
00057 #include <simdmath/floorf4.h>
00058 #include <simdmath/log2f4.h>
00059 #include <simdmath/powf4.h>
00060 #include <simdmath/sinf4.h>
00061 #include <simdmath/sqrtf4.h>
00062 #include <simdmath/truncf4.h>
00063
00064 #include "pipe/p_compiler.h"
00065 #include "pipe/p_state.h"
00066 #include "pipe/p_shader_tokens.h"
00067 #include "tgsi/tgsi_parse.h"
00068 #include "tgsi/tgsi_util.h"
00069 #include "spu_exec.h"
00070 #include "spu_main.h"
00071 #include "spu_vertex_shader.h"
00072 #include "spu_dcache.h"
00073 #include "cell/common.h"
00074
00075 #define TILE_TOP_LEFT 0
00076 #define TILE_TOP_RIGHT 1
00077 #define TILE_BOTTOM_LEFT 2
00078 #define TILE_BOTTOM_RIGHT 3
00079
00080
00081
00082
00083 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
00084 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
00085 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
00086 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
00087 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
00088 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
00089 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
00090 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
00091 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
00092 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
00093 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
00094 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
00095 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
00096 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
00097 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
00098 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
00099 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
00100 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
00101 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
00102 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
00103 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
00104 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
00105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
00106
00107 #define FOR_EACH_CHANNEL(CHAN)\
00108 for (CHAN = 0; CHAN < 4; CHAN++)
00109
00110 #define IS_CHANNEL_ENABLED(INST, CHAN)\
00111 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
00112
00113 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
00114 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
00115
00116 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
00117 FOR_EACH_CHANNEL( CHAN )\
00118 if (IS_CHANNEL_ENABLED( INST, CHAN ))
00119
00120 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
00121 FOR_EACH_CHANNEL( CHAN )\
00122 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
00123
00124
00126 #define UPDATE_EXEC_MASK(MACH) \
00127 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
00128
00129
00130 #define CHAN_X 0
00131 #define CHAN_Y 1
00132 #define CHAN_Z 2
00133 #define CHAN_W 3
00134
00135
00136
00142 void
00143 spu_exec_machine_init(struct spu_exec_machine *mach,
00144 uint numSamplers,
00145 struct spu_sampler *samplers,
00146 unsigned processor)
00147 {
00148 const qword zero = si_il(0);
00149 const qword not_zero = si_il(~0);
00150
00151 (void) numSamplers;
00152 mach->Samplers = samplers;
00153 mach->Processor = processor;
00154 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
00155
00156
00157 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
00158 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
00159 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
00160 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
00161
00162 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
00163 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
00164 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
00165 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
00166 }
00167
00168
00169 static INLINE qword
00170 micro_abs(qword src)
00171 {
00172 return si_rotmi(si_shli(src, 1), -1);
00173 }
00174
00175 static INLINE qword
00176 micro_ceil(qword src)
00177 {
00178 return (qword) _ceilf4((vec_float4) src);
00179 }
00180
00181 static INLINE qword
00182 micro_cos(qword src)
00183 {
00184 return (qword) _cosf4((vec_float4) src);
00185 }
00186
00187 static const qword br_shuf = {
00188 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00189 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00190 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00191 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00192 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00193 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00194 TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00195 TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00196 };
00197
00198 static const qword bl_shuf = {
00199 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00200 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00201 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00202 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00203 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00204 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00205 TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00206 TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00207 };
00208
00209 static const qword tl_shuf = {
00210 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00211 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00212 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00213 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00214 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00215 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00216 TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00217 TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00218 };
00219
00220 static qword
00221 micro_ddx(qword src)
00222 {
00223 qword bottom_right = si_shufb(src, src, br_shuf);
00224 qword bottom_left = si_shufb(src, src, bl_shuf);
00225
00226 return si_fs(bottom_right, bottom_left);
00227 }
00228
00229 static qword
00230 micro_ddy(qword src)
00231 {
00232 qword top_left = si_shufb(src, src, tl_shuf);
00233 qword bottom_left = si_shufb(src, src, bl_shuf);
00234
00235 return si_fs(top_left, bottom_left);
00236 }
00237
00238 static INLINE qword
00239 micro_div(qword src0, qword src1)
00240 {
00241 return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
00242 }
00243
00244 static qword
00245 micro_flr(qword src)
00246 {
00247 return (qword) _floorf4((vec_float4) src);
00248 }
00249
00250 static qword
00251 micro_frc(qword src)
00252 {
00253 return si_fs(src, (qword) _floorf4((vec_float4) src));
00254 }
00255
00256 static INLINE qword
00257 micro_ge(qword src0, qword src1)
00258 {
00259 return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00260 }
00261
00262 static qword
00263 micro_lg2(qword src)
00264 {
00265 return (qword) _log2f4((vec_float4) src);
00266 }
00267
00268 static INLINE qword
00269 micro_lt(qword src0, qword src1)
00270 {
00271 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00272
00273 return si_xori(tmp, 0xff);
00274 }
00275
00276 static INLINE qword
00277 micro_max(qword src0, qword src1)
00278 {
00279 return si_selb(src1, src0, si_fcgt(src0, src1));
00280 }
00281
00282 static INLINE qword
00283 micro_min(qword src0, qword src1)
00284 {
00285 return si_selb(src0, src1, si_fcgt(src0, src1));
00286 }
00287
00288 static qword
00289 micro_neg(qword src)
00290 {
00291 return si_xor(src, (qword) spu_splats(0x80000000));
00292 }
00293
00294 static qword
00295 micro_set_sign(qword src)
00296 {
00297 return si_or(src, (qword) spu_splats(0x80000000));
00298 }
00299
00300 static qword
00301 micro_pow(qword src0, qword src1)
00302 {
00303 return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
00304 }
00305
00306 static qword
00307 micro_rnd(qword src)
00308 {
00309 const qword half = (qword) spu_splats(0.5f);
00310
00311
00312
00313 return (qword) _floorf4((vec_float4) si_fa(src, half));
00314 }
00315
00316 static INLINE qword
00317 micro_ishr(qword src0, qword src1)
00318 {
00319 return si_rotma(src0, si_sfi(src1, 0));
00320 }
00321
00322 static qword
00323 micro_trunc(qword src)
00324 {
00325 return (qword) _truncf4((vec_float4) src);
00326 }
00327
00328 static qword
00329 micro_sin(qword src)
00330 {
00331 return (qword) _sinf4((vec_float4) src);
00332 }
00333
00334 static INLINE qword
00335 micro_sqrt(qword src)
00336 {
00337 return (qword) _sqrtf4((vec_float4) src);
00338 }
00339
00340 static void
00341 fetch_src_file_channel(
00342 const struct spu_exec_machine *mach,
00343 const uint file,
00344 const uint swizzle,
00345 const union spu_exec_channel *index,
00346 union spu_exec_channel *chan )
00347 {
00348 switch( swizzle ) {
00349 case TGSI_EXTSWIZZLE_X:
00350 case TGSI_EXTSWIZZLE_Y:
00351 case TGSI_EXTSWIZZLE_Z:
00352 case TGSI_EXTSWIZZLE_W:
00353 switch( file ) {
00354 case TGSI_FILE_CONSTANT: {
00355 unsigned i;
00356
00357 for (i = 0; i < 4; i++) {
00358 const float *ptr = mach->Consts[index->i[i]];
00359 float tmp[4];
00360
00361 spu_dcache_fetch_unaligned((qword *) tmp,
00362 (uintptr_t)(ptr + swizzle),
00363 sizeof(float));
00364
00365 chan->f[i] = tmp[0];
00366 }
00367 break;
00368 }
00369
00370 case TGSI_FILE_INPUT:
00371 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
00372 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
00373 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
00374 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
00375 break;
00376
00377 case TGSI_FILE_TEMPORARY:
00378 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
00379 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
00380 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
00381 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
00382 break;
00383
00384 case TGSI_FILE_IMMEDIATE:
00385 ASSERT( index->i[0] < (int) mach->ImmLimit );
00386 ASSERT( index->i[1] < (int) mach->ImmLimit );
00387 ASSERT( index->i[2] < (int) mach->ImmLimit );
00388 ASSERT( index->i[3] < (int) mach->ImmLimit );
00389
00390 chan->f[0] = mach->Imms[index->i[0]][swizzle];
00391 chan->f[1] = mach->Imms[index->i[1]][swizzle];
00392 chan->f[2] = mach->Imms[index->i[2]][swizzle];
00393 chan->f[3] = mach->Imms[index->i[3]][swizzle];
00394 break;
00395
00396 case TGSI_FILE_ADDRESS:
00397 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
00398 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
00399 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
00400 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
00401 break;
00402
00403 case TGSI_FILE_OUTPUT:
00404
00405 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
00406 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
00407 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
00408 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
00409 break;
00410
00411 default:
00412 ASSERT( 0 );
00413 }
00414 break;
00415
00416 case TGSI_EXTSWIZZLE_ZERO:
00417 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
00418 break;
00419
00420 case TGSI_EXTSWIZZLE_ONE:
00421 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
00422 break;
00423
00424 default:
00425 ASSERT( 0 );
00426 }
00427 }
00428
00429 static void
00430 fetch_source(
00431 const struct spu_exec_machine *mach,
00432 union spu_exec_channel *chan,
00433 const struct tgsi_full_src_register *reg,
00434 const uint chan_index )
00435 {
00436 union spu_exec_channel index;
00437 uint swizzle;
00438
00439 index.i[0] =
00440 index.i[1] =
00441 index.i[2] =
00442 index.i[3] = reg->SrcRegister.Index;
00443
00444 if (reg->SrcRegister.Indirect) {
00445 union spu_exec_channel index2;
00446 union spu_exec_channel indir_index;
00447
00448 index2.i[0] =
00449 index2.i[1] =
00450 index2.i[2] =
00451 index2.i[3] = reg->SrcRegisterInd.Index;
00452
00453 swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd,
00454 CHAN_X);
00455 fetch_src_file_channel(
00456 mach,
00457 reg->SrcRegisterInd.File,
00458 swizzle,
00459 &index2,
00460 &indir_index );
00461
00462 index.q = si_a(index.q, indir_index.q);
00463 }
00464
00465 if( reg->SrcRegister.Dimension ) {
00466 switch( reg->SrcRegister.File ) {
00467 case TGSI_FILE_INPUT:
00468 index.q = si_mpyi(index.q, 17);
00469 break;
00470 case TGSI_FILE_CONSTANT:
00471 index.q = si_shli(index.q, 12);
00472 break;
00473 default:
00474 ASSERT( 0 );
00475 }
00476
00477 index.i[0] += reg->SrcRegisterDim.Index;
00478 index.i[1] += reg->SrcRegisterDim.Index;
00479 index.i[2] += reg->SrcRegisterDim.Index;
00480 index.i[3] += reg->SrcRegisterDim.Index;
00481
00482 if (reg->SrcRegisterDim.Indirect) {
00483 union spu_exec_channel index2;
00484 union spu_exec_channel indir_index;
00485
00486 index2.i[0] =
00487 index2.i[1] =
00488 index2.i[2] =
00489 index2.i[3] = reg->SrcRegisterDimInd.Index;
00490
00491 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X );
00492 fetch_src_file_channel(
00493 mach,
00494 reg->SrcRegisterDimInd.File,
00495 swizzle,
00496 &index2,
00497 &indir_index );
00498
00499 index.q = si_a(index.q, indir_index.q);
00500 }
00501 }
00502
00503 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
00504 fetch_src_file_channel(
00505 mach,
00506 reg->SrcRegister.File,
00507 swizzle,
00508 &index,
00509 chan );
00510
00511 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
00512 case TGSI_UTIL_SIGN_CLEAR:
00513 chan->q = micro_abs(chan->q);
00514 break;
00515
00516 case TGSI_UTIL_SIGN_SET:
00517 chan->q = micro_set_sign(chan->q);
00518 break;
00519
00520 case TGSI_UTIL_SIGN_TOGGLE:
00521 chan->q = micro_neg(chan->q);
00522 break;
00523
00524 case TGSI_UTIL_SIGN_KEEP:
00525 break;
00526 }
00527
00528 if (reg->SrcRegisterExtMod.Complement) {
00529 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
00530 }
00531 }
00532
00533 static void
00534 store_dest(
00535 struct spu_exec_machine *mach,
00536 const union spu_exec_channel *chan,
00537 const struct tgsi_full_dst_register *reg,
00538 const struct tgsi_full_instruction *inst,
00539 uint chan_index )
00540 {
00541 union spu_exec_channel *dst;
00542
00543 switch( reg->DstRegister.File ) {
00544 case TGSI_FILE_NULL:
00545 return;
00546
00547 case TGSI_FILE_OUTPUT:
00548 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
00549 + reg->DstRegister.Index].xyzw[chan_index];
00550 break;
00551
00552 case TGSI_FILE_TEMPORARY:
00553 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
00554 break;
00555
00556 case TGSI_FILE_ADDRESS:
00557 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
00558 break;
00559
00560 default:
00561 ASSERT( 0 );
00562 return;
00563 }
00564
00565 switch (inst->Instruction.Saturate)
00566 {
00567 case TGSI_SAT_NONE:
00568 if (mach->ExecMask & 0x1)
00569 dst->i[0] = chan->i[0];
00570 if (mach->ExecMask & 0x2)
00571 dst->i[1] = chan->i[1];
00572 if (mach->ExecMask & 0x4)
00573 dst->i[2] = chan->i[2];
00574 if (mach->ExecMask & 0x8)
00575 dst->i[3] = chan->i[3];
00576 break;
00577
00578 case TGSI_SAT_ZERO_ONE:
00579
00580 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00581 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
00582 break;
00583
00584 case TGSI_SAT_MINUS_PLUS_ONE:
00585 ASSERT( 0 );
00586 break;
00587
00588 default:
00589 ASSERT( 0 );
00590 }
00591 }
00592
00593 #define FETCH(VAL,INDEX,CHAN)\
00594 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
00595
00596 #define STORE(VAL,INDEX,CHAN)\
00597 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
00598
00599
00604 static void
00605 exec_kil(struct spu_exec_machine *mach,
00606 const struct tgsi_full_instruction *inst)
00607 {
00608 uint uniquemask;
00609 uint chan_index;
00610 uint kilmask = 0;
00611 union spu_exec_channel r[1];
00612
00613
00614
00615
00616 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
00617
00618 for (chan_index = 0; chan_index < 4; chan_index++)
00619 {
00620 uint swizzle;
00621 uint i;
00622
00623
00624 swizzle = tgsi_util_get_full_src_register_extswizzle (
00625 &inst->FullSrcRegisters[0],
00626 chan_index);
00627
00628
00629 if (uniquemask & (1 << swizzle))
00630 continue;
00631 uniquemask |= 1 << swizzle;
00632
00633 FETCH(&r[0], 0, chan_index);
00634 for (i = 0; i < 4; i++)
00635 if (r[0].f[i] < 0.0f)
00636 kilmask |= 1 << i;
00637 }
00638
00639 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00640 }
00641
00646 static void
00647 exec_kilp(struct tgsi_exec_machine *mach,
00648 const struct tgsi_full_instruction *inst)
00649 {
00650 uint kilmask = 0;
00651
00652
00653
00654 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00655 }
00656
00657
00658
00659
00660 static void
00661 fetch_texel( struct spu_sampler *sampler,
00662 const union spu_exec_channel *s,
00663 const union spu_exec_channel *t,
00664 const union spu_exec_channel *p,
00665 float lodbias,
00666 union spu_exec_channel *r,
00667 union spu_exec_channel *g,
00668 union spu_exec_channel *b,
00669 union spu_exec_channel *a )
00670 {
00671 qword rgba[4];
00672 qword out[4];
00673
00674 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
00675 (float (*)[4]) rgba);
00676
00677 _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
00678 r->q = out[0];
00679 g->q = out[1];
00680 b->q = out[2];
00681 a->q = out[3];
00682 }
00683
00684
00685 static void
00686 exec_tex(struct spu_exec_machine *mach,
00687 const struct tgsi_full_instruction *inst,
00688 boolean biasLod, boolean projected)
00689 {
00690 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
00691 union spu_exec_channel r[8];
00692 uint chan_index;
00693 float lodBias;
00694
00695
00696
00697 switch (inst->InstructionExtTexture.Texture) {
00698 case TGSI_TEXTURE_1D:
00699
00700 FETCH(&r[0], 0, CHAN_X);
00701
00702 if (projected) {
00703 FETCH(&r[1], 0, CHAN_W);
00704 r[0].q = micro_div(r[0].q, r[1].q);
00705 }
00706
00707 if (biasLod) {
00708 FETCH(&r[1], 0, CHAN_W);
00709 lodBias = r[2].f[0];
00710 }
00711 else
00712 lodBias = 0.0;
00713
00714 fetch_texel(&mach->Samplers[unit],
00715 &r[0], NULL, NULL, lodBias,
00716 &r[0], &r[1], &r[2], &r[3]);
00717 break;
00718
00719 case TGSI_TEXTURE_2D:
00720 case TGSI_TEXTURE_RECT:
00721
00722 FETCH(&r[0], 0, CHAN_X);
00723 FETCH(&r[1], 0, CHAN_Y);
00724 FETCH(&r[2], 0, CHAN_Z);
00725
00726 if (projected) {
00727 FETCH(&r[3], 0, CHAN_W);
00728 r[0].q = micro_div(r[0].q, r[3].q);
00729 r[1].q = micro_div(r[1].q, r[3].q);
00730 r[2].q = micro_div(r[2].q, r[3].q);
00731 }
00732
00733 if (biasLod) {
00734 FETCH(&r[3], 0, CHAN_W);
00735 lodBias = r[3].f[0];
00736 }
00737 else
00738 lodBias = 0.0;
00739
00740 fetch_texel(&mach->Samplers[unit],
00741 &r[0], &r[1], &r[2], lodBias,
00742 &r[0], &r[1], &r[2], &r[3]);
00743 break;
00744
00745 case TGSI_TEXTURE_3D:
00746 case TGSI_TEXTURE_CUBE:
00747
00748 FETCH(&r[0], 0, CHAN_X);
00749 FETCH(&r[1], 0, CHAN_Y);
00750 FETCH(&r[2], 0, CHAN_Z);
00751
00752 if (projected) {
00753 FETCH(&r[3], 0, CHAN_W);
00754 r[0].q = micro_div(r[0].q, r[3].q);
00755 r[1].q = micro_div(r[1].q, r[3].q);
00756 r[2].q = micro_div(r[2].q, r[3].q);
00757 }
00758
00759 if (biasLod) {
00760 FETCH(&r[3], 0, CHAN_W);
00761 lodBias = r[3].f[0];
00762 }
00763 else
00764 lodBias = 0.0;
00765
00766 fetch_texel(&mach->Samplers[unit],
00767 &r[0], &r[1], &r[2], lodBias,
00768 &r[0], &r[1], &r[2], &r[3]);
00769 break;
00770
00771 default:
00772 ASSERT (0);
00773 }
00774
00775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00776 STORE( &r[chan_index], 0, chan_index );
00777 }
00778 }
00779
00780
00781
00782 static void
00783 constant_interpolation(
00784 struct spu_exec_machine *mach,
00785 unsigned attrib,
00786 unsigned chan )
00787 {
00788 unsigned i;
00789
00790 for( i = 0; i < QUAD_SIZE; i++ ) {
00791 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
00792 }
00793 }
00794
00795 static void
00796 linear_interpolation(
00797 struct spu_exec_machine *mach,
00798 unsigned attrib,
00799 unsigned chan )
00800 {
00801 const float x = mach->QuadPos.xyzw[0].f[0];
00802 const float y = mach->QuadPos.xyzw[1].f[0];
00803 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00804 const float dady = mach->InterpCoefs[attrib].dady[chan];
00805 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00806 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
00807 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
00808 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
00809 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
00810 }
00811
00812 static void
00813 perspective_interpolation(
00814 struct spu_exec_machine *mach,
00815 unsigned attrib,
00816 unsigned chan )
00817 {
00818 const float x = mach->QuadPos.xyzw[0].f[0];
00819 const float y = mach->QuadPos.xyzw[1].f[0];
00820 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00821 const float dady = mach->InterpCoefs[attrib].dady[chan];
00822 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00823 const float *w = mach->QuadPos.xyzw[3].f;
00824
00825 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
00826 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
00827 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
00828 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
00829 }
00830
00831
00832 typedef void (* interpolation_func)(
00833 struct spu_exec_machine *mach,
00834 unsigned attrib,
00835 unsigned chan );
00836
00837 static void
00838 exec_declaration(struct spu_exec_machine *mach,
00839 const struct tgsi_full_declaration *decl)
00840 {
00841 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
00842 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
00843 unsigned first, last, mask;
00844 interpolation_func interp;
00845
00846 first = decl->DeclarationRange.First;
00847 last = decl->DeclarationRange.Last;
00848 mask = decl->Declaration.UsageMask;
00849
00850 switch( decl->Declaration.Interpolate ) {
00851 case TGSI_INTERPOLATE_CONSTANT:
00852 interp = constant_interpolation;
00853 break;
00854
00855 case TGSI_INTERPOLATE_LINEAR:
00856 interp = linear_interpolation;
00857 break;
00858
00859 case TGSI_INTERPOLATE_PERSPECTIVE:
00860 interp = perspective_interpolation;
00861 break;
00862
00863 default:
00864 ASSERT( 0 );
00865 }
00866
00867 if( mask == TGSI_WRITEMASK_XYZW ) {
00868 unsigned i, j;
00869
00870 for( i = first; i <= last; i++ ) {
00871 for( j = 0; j < NUM_CHANNELS; j++ ) {
00872 interp( mach, i, j );
00873 }
00874 }
00875 }
00876 else {
00877 unsigned i, j;
00878
00879 for( j = 0; j < NUM_CHANNELS; j++ ) {
00880 if( mask & (1 << j) ) {
00881 for( i = first; i <= last; i++ ) {
00882 interp( mach, i, j );
00883 }
00884 }
00885 }
00886 }
00887 }
00888 }
00889 }
00890
00891 static void
00892 exec_instruction(
00893 struct spu_exec_machine *mach,
00894 const struct tgsi_full_instruction *inst,
00895 int *pc )
00896 {
00897 uint chan_index;
00898 union spu_exec_channel r[8];
00899
00900 (*pc)++;
00901
00902 switch (inst->Instruction.Opcode) {
00903 case TGSI_OPCODE_ARL:
00904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00905 FETCH( &r[0], 0, chan_index );
00906 r[0].q = si_cflts(r[0].q, 0);
00907 STORE( &r[0], 0, chan_index );
00908 }
00909 break;
00910
00911 case TGSI_OPCODE_MOV:
00912 case TGSI_OPCODE_SWZ:
00913 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00914 FETCH( &r[0], 0, chan_index );
00915 STORE( &r[0], 0, chan_index );
00916 }
00917 break;
00918
00919 case TGSI_OPCODE_LIT:
00920 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
00921 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
00922 }
00923
00924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00925 FETCH( &r[0], 0, CHAN_X );
00926 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
00927 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00928 STORE( &r[0], 0, CHAN_Y );
00929 }
00930
00931 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00932 FETCH( &r[1], 0, CHAN_Y );
00933 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00934
00935 FETCH( &r[2], 0, CHAN_W );
00936 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
00937 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
00938 r[1].q = micro_pow(r[1].q, r[2].q);
00939
00940
00941
00942 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00943 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
00944 r[0].q);
00945 STORE( &r[0], 0, CHAN_Z );
00946 }
00947 }
00948
00949 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
00950 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
00951 }
00952 break;
00953
00954 case TGSI_OPCODE_RCP:
00955
00956 FETCH( &r[0], 0, CHAN_X );
00957 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00958 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00959 STORE( &r[0], 0, chan_index );
00960 }
00961 break;
00962
00963 case TGSI_OPCODE_RSQ:
00964
00965 FETCH( &r[0], 0, CHAN_X );
00966 r[0].q = micro_sqrt(r[0].q);
00967 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00968 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00969 STORE( &r[0], 0, chan_index );
00970 }
00971 break;
00972
00973 case TGSI_OPCODE_EXP:
00974 ASSERT (0);
00975 break;
00976
00977 case TGSI_OPCODE_LOG:
00978 ASSERT (0);
00979 break;
00980
00981 case TGSI_OPCODE_MUL:
00982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
00983 {
00984 FETCH(&r[0], 0, chan_index);
00985 FETCH(&r[1], 1, chan_index);
00986
00987 r[0].q = si_fm(r[0].q, r[1].q);
00988
00989 STORE(&r[0], 0, chan_index);
00990 }
00991 break;
00992
00993 case TGSI_OPCODE_ADD:
00994 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00995 FETCH( &r[0], 0, chan_index );
00996 FETCH( &r[1], 1, chan_index );
00997 r[0].q = si_fa(r[0].q, r[1].q);
00998 STORE( &r[0], 0, chan_index );
00999 }
01000 break;
01001
01002 case TGSI_OPCODE_DP3:
01003
01004 FETCH( &r[0], 0, CHAN_X );
01005 FETCH( &r[1], 1, CHAN_X );
01006 r[0].q = si_fm(r[0].q, r[1].q);
01007
01008 FETCH( &r[1], 0, CHAN_Y );
01009 FETCH( &r[2], 1, CHAN_Y );
01010 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01011
01012
01013 FETCH( &r[1], 0, CHAN_Z );
01014 FETCH( &r[2], 1, CHAN_Z );
01015 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01016
01017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01018 STORE( &r[0], 0, chan_index );
01019 }
01020 break;
01021
01022 case TGSI_OPCODE_DP4:
01023
01024 FETCH(&r[0], 0, CHAN_X);
01025 FETCH(&r[1], 1, CHAN_X);
01026
01027 r[0].q = si_fm(r[0].q, r[1].q);
01028
01029 FETCH(&r[1], 0, CHAN_Y);
01030 FETCH(&r[2], 1, CHAN_Y);
01031
01032 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01033
01034 FETCH(&r[1], 0, CHAN_Z);
01035 FETCH(&r[2], 1, CHAN_Z);
01036
01037 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01038
01039 FETCH(&r[1], 0, CHAN_W);
01040 FETCH(&r[2], 1, CHAN_W);
01041
01042 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01043
01044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01045 STORE( &r[0], 0, chan_index );
01046 }
01047 break;
01048
01049 case TGSI_OPCODE_DST:
01050 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01051 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01052 }
01053
01054 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01055 FETCH( &r[0], 0, CHAN_Y );
01056 FETCH( &r[1], 1, CHAN_Y);
01057 r[0].q = si_fm(r[0].q, r[1].q);
01058 STORE( &r[0], 0, CHAN_Y );
01059 }
01060
01061 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01062 FETCH( &r[0], 0, CHAN_Z );
01063 STORE( &r[0], 0, CHAN_Z );
01064 }
01065
01066 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01067 FETCH( &r[0], 1, CHAN_W );
01068 STORE( &r[0], 0, CHAN_W );
01069 }
01070 break;
01071
01072 case TGSI_OPCODE_MIN:
01073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01074 FETCH(&r[0], 0, chan_index);
01075 FETCH(&r[1], 1, chan_index);
01076
01077 r[0].q = micro_min(r[0].q, r[1].q);
01078
01079 STORE(&r[0], 0, chan_index);
01080 }
01081 break;
01082
01083 case TGSI_OPCODE_MAX:
01084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01085 FETCH(&r[0], 0, chan_index);
01086 FETCH(&r[1], 1, chan_index);
01087
01088 r[0].q = micro_max(r[0].q, r[1].q);
01089
01090 STORE(&r[0], 0, chan_index );
01091 }
01092 break;
01093
01094 case TGSI_OPCODE_SLT:
01095
01096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01097 FETCH( &r[0], 0, chan_index );
01098 FETCH( &r[1], 1, chan_index );
01099
01100 r[0].q = micro_ge(r[0].q, r[1].q);
01101 r[0].q = si_xori(r[0].q, 0xff);
01102
01103 STORE( &r[0], 0, chan_index );
01104 }
01105 break;
01106
01107 case TGSI_OPCODE_SGE:
01108
01109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01110 FETCH( &r[0], 0, chan_index );
01111 FETCH( &r[1], 1, chan_index );
01112 r[0].q = micro_ge(r[0].q, r[1].q);
01113 STORE( &r[0], 0, chan_index );
01114 }
01115 break;
01116
01117 case TGSI_OPCODE_MAD:
01118
01119 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01120 FETCH( &r[0], 0, chan_index );
01121 FETCH( &r[1], 1, chan_index );
01122 FETCH( &r[2], 2, chan_index );
01123 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01124 STORE( &r[0], 0, chan_index );
01125 }
01126 break;
01127
01128 case TGSI_OPCODE_SUB:
01129 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01130 FETCH(&r[0], 0, chan_index);
01131 FETCH(&r[1], 1, chan_index);
01132
01133 r[0].q = si_fs(r[0].q, r[1].q);
01134
01135 STORE(&r[0], 0, chan_index);
01136 }
01137 break;
01138
01139 case TGSI_OPCODE_LERP:
01140
01141 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01142 FETCH(&r[0], 0, chan_index);
01143 FETCH(&r[1], 1, chan_index);
01144 FETCH(&r[2], 2, chan_index);
01145
01146 r[1].q = si_fs(r[1].q, r[2].q);
01147 r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01148
01149 STORE(&r[0], 0, chan_index);
01150 }
01151 break;
01152
01153 case TGSI_OPCODE_CND:
01154 ASSERT (0);
01155 break;
01156
01157 case TGSI_OPCODE_CND0:
01158 ASSERT (0);
01159 break;
01160
01161 case TGSI_OPCODE_DOT2ADD:
01162
01163 ASSERT (0);
01164 break;
01165
01166 case TGSI_OPCODE_INDEX:
01167 ASSERT (0);
01168 break;
01169
01170 case TGSI_OPCODE_NEGATE:
01171 ASSERT (0);
01172 break;
01173
01174 case TGSI_OPCODE_FRAC:
01175
01176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01177 FETCH( &r[0], 0, chan_index );
01178 r[0].q = micro_frc(r[0].q);
01179 STORE( &r[0], 0, chan_index );
01180 }
01181 break;
01182
01183 case TGSI_OPCODE_CLAMP:
01184 ASSERT (0);
01185 break;
01186
01187 case TGSI_OPCODE_FLOOR:
01188
01189 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01190 FETCH( &r[0], 0, chan_index );
01191 r[0].q = micro_flr(r[0].q);
01192 STORE( &r[0], 0, chan_index );
01193 }
01194 break;
01195
01196 case TGSI_OPCODE_ROUND:
01197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01198 FETCH( &r[0], 0, chan_index );
01199 r[0].q = micro_rnd(r[0].q);
01200 STORE( &r[0], 0, chan_index );
01201 }
01202 break;
01203
01204 case TGSI_OPCODE_EXPBASE2:
01205
01206 FETCH(&r[0], 0, CHAN_X);
01207
01208 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
01209
01210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01211 STORE( &r[0], 0, chan_index );
01212 }
01213 break;
01214
01215 case TGSI_OPCODE_LOGBASE2:
01216
01217 FETCH( &r[0], 0, CHAN_X );
01218 r[0].q = micro_lg2(r[0].q);
01219 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01220 STORE( &r[0], 0, chan_index );
01221 }
01222 break;
01223
01224 case TGSI_OPCODE_POWER:
01225
01226 FETCH(&r[0], 0, CHAN_X);
01227 FETCH(&r[1], 1, CHAN_X);
01228
01229 r[0].q = micro_pow(r[0].q, r[1].q);
01230
01231 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01232 STORE( &r[0], 0, chan_index );
01233 }
01234 break;
01235
01236 case TGSI_OPCODE_CROSSPRODUCT:
01237
01238 FETCH(&r[0], 0, CHAN_Y);
01239 FETCH(&r[1], 1, CHAN_Z);
01240 FETCH(&r[3], 0, CHAN_Z);
01241 FETCH(&r[4], 1, CHAN_Y);
01242
01243
01244
01245 r[2].q = si_fm(r[3].q, r[5].q);
01246 r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
01247
01248 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01249 STORE( &r[2], 0, CHAN_X );
01250 }
01251
01252 FETCH(&r[2], 1, CHAN_X);
01253 FETCH(&r[5], 0, CHAN_X);
01254
01255
01256
01257 r[1].q = si_fm(r[1].q, r[5].q);
01258 r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
01259
01260 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01261 STORE( &r[3], 0, CHAN_Y );
01262 }
01263
01264
01265
01266 r[0].q = si_fm(r[0].q, r[2].q);
01267 r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
01268
01269 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01270 STORE( &r[5], 0, CHAN_Z );
01271 }
01272
01273 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01274 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01275 }
01276 break;
01277
01278 case TGSI_OPCODE_MULTIPLYMATRIX:
01279 ASSERT (0);
01280 break;
01281
01282 case TGSI_OPCODE_ABS:
01283 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01284 FETCH(&r[0], 0, chan_index);
01285
01286 r[0].q = micro_abs(r[0].q);
01287
01288 STORE(&r[0], 0, chan_index);
01289 }
01290 break;
01291
01292 case TGSI_OPCODE_RCC:
01293 ASSERT (0);
01294 break;
01295
01296 case TGSI_OPCODE_DPH:
01297 FETCH(&r[0], 0, CHAN_X);
01298 FETCH(&r[1], 1, CHAN_X);
01299
01300 r[0].q = si_fm(r[0].q, r[1].q);
01301
01302 FETCH(&r[1], 0, CHAN_Y);
01303 FETCH(&r[2], 1, CHAN_Y);
01304
01305 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01306
01307 FETCH(&r[1], 0, CHAN_Z);
01308 FETCH(&r[2], 1, CHAN_Z);
01309
01310 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01311
01312 FETCH(&r[1], 1, CHAN_W);
01313
01314 r[0].q = si_fa(r[0].q, r[1].q);
01315
01316 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01317 STORE( &r[0], 0, chan_index );
01318 }
01319 break;
01320
01321 case TGSI_OPCODE_COS:
01322 FETCH(&r[0], 0, CHAN_X);
01323
01324 r[0].q = micro_cos(r[0].q);
01325
01326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01327 STORE( &r[0], 0, chan_index );
01328 }
01329 break;
01330
01331 case TGSI_OPCODE_DDX:
01332 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01333 FETCH( &r[0], 0, chan_index );
01334 r[0].q = micro_ddx(r[0].q);
01335 STORE( &r[0], 0, chan_index );
01336 }
01337 break;
01338
01339 case TGSI_OPCODE_DDY:
01340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01341 FETCH( &r[0], 0, chan_index );
01342 r[0].q = micro_ddy(r[0].q);
01343 STORE( &r[0], 0, chan_index );
01344 }
01345 break;
01346
01347 case TGSI_OPCODE_KILP:
01348 exec_kilp (mach, inst);
01349 break;
01350
01351 case TGSI_OPCODE_KIL:
01352 exec_kil (mach, inst);
01353 break;
01354
01355 case TGSI_OPCODE_PK2H:
01356 ASSERT (0);
01357 break;
01358
01359 case TGSI_OPCODE_PK2US:
01360 ASSERT (0);
01361 break;
01362
01363 case TGSI_OPCODE_PK4B:
01364 ASSERT (0);
01365 break;
01366
01367 case TGSI_OPCODE_PK4UB:
01368 ASSERT (0);
01369 break;
01370
01371 case TGSI_OPCODE_RFL:
01372 ASSERT (0);
01373 break;
01374
01375 case TGSI_OPCODE_SEQ:
01376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01377 FETCH( &r[0], 0, chan_index );
01378 FETCH( &r[1], 1, chan_index );
01379
01380 r[0].q = si_fceq(r[0].q, r[1].q);
01381
01382 STORE( &r[0], 0, chan_index );
01383 }
01384 break;
01385
01386 case TGSI_OPCODE_SFL:
01387 ASSERT (0);
01388 break;
01389
01390 case TGSI_OPCODE_SGT:
01391 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01392 FETCH( &r[0], 0, chan_index );
01393 FETCH( &r[1], 1, chan_index );
01394 r[0].q = si_fcgt(r[0].q, r[1].q);
01395 STORE( &r[0], 0, chan_index );
01396 }
01397 break;
01398
01399 case TGSI_OPCODE_SIN:
01400 FETCH( &r[0], 0, CHAN_X );
01401 r[0].q = micro_sin(r[0].q);
01402 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01403 STORE( &r[0], 0, chan_index );
01404 }
01405 break;
01406
01407 case TGSI_OPCODE_SLE:
01408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01409 FETCH( &r[0], 0, chan_index );
01410 FETCH( &r[1], 1, chan_index );
01411
01412 r[0].q = si_fcgt(r[0].q, r[1].q);
01413 r[0].q = si_xori(r[0].q, 0xff);
01414
01415 STORE( &r[0], 0, chan_index );
01416 }
01417 break;
01418
01419 case TGSI_OPCODE_SNE:
01420 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01421 FETCH( &r[0], 0, chan_index );
01422 FETCH( &r[1], 1, chan_index );
01423
01424 r[0].q = si_fceq(r[0].q, r[1].q);
01425 r[0].q = si_xori(r[0].q, 0xff);
01426
01427 STORE( &r[0], 0, chan_index );
01428 }
01429 break;
01430
01431 case TGSI_OPCODE_STR:
01432 ASSERT (0);
01433 break;
01434
01435 case TGSI_OPCODE_TEX:
01436
01437
01438
01439 exec_tex(mach, inst, FALSE, FALSE);
01440 break;
01441
01442 case TGSI_OPCODE_TXB:
01443
01444
01445
01446 exec_tex(mach, inst, TRUE, FALSE);
01447 break;
01448
01449 case TGSI_OPCODE_TXD:
01450
01451
01452
01453
01454
01455 ASSERT (0);
01456 break;
01457
01458 case TGSI_OPCODE_TXL:
01459
01460
01461
01462 exec_tex(mach, inst, TRUE, FALSE);
01463 break;
01464
01465 case TGSI_OPCODE_TXP:
01466
01467
01468
01469 exec_tex(mach, inst, TRUE, TRUE);
01470 break;
01471
01472 case TGSI_OPCODE_UP2H:
01473 ASSERT (0);
01474 break;
01475
01476 case TGSI_OPCODE_UP2US:
01477 ASSERT (0);
01478 break;
01479
01480 case TGSI_OPCODE_UP4B:
01481 ASSERT (0);
01482 break;
01483
01484 case TGSI_OPCODE_UP4UB:
01485 ASSERT (0);
01486 break;
01487
01488 case TGSI_OPCODE_X2D:
01489 ASSERT (0);
01490 break;
01491
01492 case TGSI_OPCODE_ARA:
01493 ASSERT (0);
01494 break;
01495
01496 case TGSI_OPCODE_ARR:
01497 ASSERT (0);
01498 break;
01499
01500 case TGSI_OPCODE_BRA:
01501 ASSERT (0);
01502 break;
01503
01504 case TGSI_OPCODE_CAL:
01505
01506 if (mach->ExecMask) {
01507
01508
01509
01510 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01511 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01512 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01513 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01514 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01515 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01516
01517 ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
01518 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
01519
01520
01521 mach->CallStack[mach->CallStackTop++] = *pc;
01522 *pc = inst->InstructionExtLabel.Label;
01523 }
01524 break;
01525
01526 case TGSI_OPCODE_RET:
01527 mach->FuncMask &= ~mach->ExecMask;
01528 UPDATE_EXEC_MASK(mach);
01529
01530 if (mach->ExecMask == 0x0) {
01531
01532
01533 if (mach->CallStackTop == 0) {
01534
01535 *pc = -1;
01536 return;
01537 }
01538 *pc = mach->CallStack[--mach->CallStackTop];
01539
01540
01541 ASSERT(mach->CondStackTop > 0);
01542 mach->CondMask = mach->CondStack[--mach->CondStackTop];
01543 ASSERT(mach->LoopStackTop > 0);
01544 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01545 ASSERT(mach->ContStackTop > 0);
01546 mach->ContMask = mach->ContStack[--mach->ContStackTop];
01547 ASSERT(mach->FuncStackTop > 0);
01548 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
01549
01550 UPDATE_EXEC_MASK(mach);
01551 }
01552 break;
01553
01554 case TGSI_OPCODE_SSG:
01555 ASSERT (0);
01556 break;
01557
01558 case TGSI_OPCODE_CMP:
01559 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01560 FETCH(&r[0], 0, chan_index);
01561 FETCH(&r[1], 1, chan_index);
01562 FETCH(&r[2], 2, chan_index);
01563
01564
01565
01566 r[3].q = si_xor(r[3].q, r[3].q);
01567 r[0].q = micro_lt(r[0].q, r[3].q);
01568 r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
01569
01570 STORE(&r[0], 0, chan_index);
01571 }
01572 break;
01573
01574 case TGSI_OPCODE_SCS:
01575 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01576 FETCH( &r[0], 0, CHAN_X );
01577 }
01578 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
01579 r[1].q = micro_cos(r[0].q);
01580 STORE( &r[1], 0, CHAN_X );
01581 }
01582 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01583 r[1].q = micro_sin(r[0].q);
01584 STORE( &r[1], 0, CHAN_Y );
01585 }
01586 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01587 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
01588 }
01589 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
01590 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01591 }
01592 break;
01593
01594 case TGSI_OPCODE_NRM:
01595 ASSERT (0);
01596 break;
01597
01598 case TGSI_OPCODE_DIV:
01599 ASSERT( 0 );
01600 break;
01601
01602 case TGSI_OPCODE_DP2:
01603 FETCH( &r[0], 0, CHAN_X );
01604 FETCH( &r[1], 1, CHAN_X );
01605 r[0].q = si_fm(r[0].q, r[1].q);
01606
01607 FETCH( &r[1], 0, CHAN_Y );
01608 FETCH( &r[2], 1, CHAN_Y );
01609 r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01610
01611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01612 STORE( &r[0], 0, chan_index );
01613 }
01614 break;
01615
01616 case TGSI_OPCODE_IF:
01617
01618 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01619 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01620 FETCH( &r[0], 0, CHAN_X );
01621
01622 if( ! r[0].u[0] ) {
01623 mach->CondMask &= ~0x1;
01624 }
01625 if( ! r[0].u[1] ) {
01626 mach->CondMask &= ~0x2;
01627 }
01628 if( ! r[0].u[2] ) {
01629 mach->CondMask &= ~0x4;
01630 }
01631 if( ! r[0].u[3] ) {
01632 mach->CondMask &= ~0x8;
01633 }
01634 UPDATE_EXEC_MASK(mach);
01635
01636 break;
01637
01638 case TGSI_OPCODE_ELSE:
01639
01640 {
01641 uint prevMask;
01642 ASSERT(mach->CondStackTop > 0);
01643 prevMask = mach->CondStack[mach->CondStackTop - 1];
01644 mach->CondMask = ~mach->CondMask & prevMask;
01645 UPDATE_EXEC_MASK(mach);
01646
01647 }
01648 break;
01649
01650 case TGSI_OPCODE_ENDIF:
01651
01652 ASSERT(mach->CondStackTop > 0);
01653 mach->CondMask = mach->CondStack[--mach->CondStackTop];
01654 UPDATE_EXEC_MASK(mach);
01655 break;
01656
01657 case TGSI_OPCODE_END:
01658
01659 *pc = -1;
01660 break;
01661
01662 case TGSI_OPCODE_REP:
01663 ASSERT (0);
01664 break;
01665
01666 case TGSI_OPCODE_ENDREP:
01667 ASSERT (0);
01668 break;
01669
01670 case TGSI_OPCODE_PUSHA:
01671 ASSERT (0);
01672 break;
01673
01674 case TGSI_OPCODE_POPA:
01675 ASSERT (0);
01676 break;
01677
01678 case TGSI_OPCODE_CEIL:
01679 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01680 FETCH( &r[0], 0, chan_index );
01681 r[0].q = micro_ceil(r[0].q);
01682 STORE( &r[0], 0, chan_index );
01683 }
01684 break;
01685
01686 case TGSI_OPCODE_I2F:
01687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01688 FETCH( &r[0], 0, chan_index );
01689 r[0].q = si_csflt(r[0].q, 0);
01690 STORE( &r[0], 0, chan_index );
01691 }
01692 break;
01693
01694 case TGSI_OPCODE_NOT:
01695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01696 FETCH( &r[0], 0, chan_index );
01697 r[0].q = si_xorbi(r[0].q, 0xff);
01698 STORE( &r[0], 0, chan_index );
01699 }
01700 break;
01701
01702 case TGSI_OPCODE_TRUNC:
01703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01704 FETCH( &r[0], 0, chan_index );
01705 r[0].q = micro_trunc(r[0].q);
01706 STORE( &r[0], 0, chan_index );
01707 }
01708 break;
01709
01710 case TGSI_OPCODE_SHL:
01711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01712 FETCH( &r[0], 0, chan_index );
01713 FETCH( &r[1], 1, chan_index );
01714
01715 r[0].q = si_shl(r[0].q, r[1].q);
01716
01717 STORE( &r[0], 0, chan_index );
01718 }
01719 break;
01720
01721 case TGSI_OPCODE_SHR:
01722 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01723 FETCH( &r[0], 0, chan_index );
01724 FETCH( &r[1], 1, chan_index );
01725 r[0].q = micro_ishr(r[0].q, r[1].q);
01726 STORE( &r[0], 0, chan_index );
01727 }
01728 break;
01729
01730 case TGSI_OPCODE_AND:
01731 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01732 FETCH( &r[0], 0, chan_index );
01733 FETCH( &r[1], 1, chan_index );
01734 r[0].q = si_and(r[0].q, r[1].q);
01735 STORE( &r[0], 0, chan_index );
01736 }
01737 break;
01738
01739 case TGSI_OPCODE_OR:
01740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01741 FETCH( &r[0], 0, chan_index );
01742 FETCH( &r[1], 1, chan_index );
01743 r[0].q = si_or(r[0].q, r[1].q);
01744 STORE( &r[0], 0, chan_index );
01745 }
01746 break;
01747
01748 case TGSI_OPCODE_MOD:
01749 ASSERT (0);
01750 break;
01751
01752 case TGSI_OPCODE_XOR:
01753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01754 FETCH( &r[0], 0, chan_index );
01755 FETCH( &r[1], 1, chan_index );
01756 r[0].q = si_xor(r[0].q, r[1].q);
01757 STORE( &r[0], 0, chan_index );
01758 }
01759 break;
01760
01761 case TGSI_OPCODE_SAD:
01762 ASSERT (0);
01763 break;
01764
01765 case TGSI_OPCODE_TXF:
01766 ASSERT (0);
01767 break;
01768
01769 case TGSI_OPCODE_TXQ:
01770 ASSERT (0);
01771 break;
01772
01773 case TGSI_OPCODE_EMIT:
01774 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
01775 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
01776 break;
01777
01778 case TGSI_OPCODE_ENDPRIM:
01779 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
01780 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
01781 break;
01782
01783 case TGSI_OPCODE_LOOP:
01784
01785 case TGSI_OPCODE_BGNLOOP2:
01786
01787 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01788 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01789 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01790 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01791 break;
01792
01793 case TGSI_OPCODE_ENDLOOP:
01794
01795 case TGSI_OPCODE_ENDLOOP2:
01796
01797 ASSERT(mach->ContStackTop > 0);
01798 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
01799 if (mach->LoopMask) {
01800
01801 *pc = inst->InstructionExtLabel.Label + 1;
01802 }
01803 else {
01804
01805 ASSERT(mach->LoopStackTop > 0);
01806 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01807
01808 ASSERT(mach->ContStackTop > 0);
01809 mach->ContMask = mach->ContStack[--mach->ContStackTop];
01810 }
01811 UPDATE_EXEC_MASK(mach);
01812 break;
01813
01814 case TGSI_OPCODE_BRK:
01815
01816 mach->LoopMask &= ~mach->ExecMask;
01817
01818 UPDATE_EXEC_MASK(mach);
01819 break;
01820
01821 case TGSI_OPCODE_CONT:
01822
01823 mach->ContMask &= ~mach->ExecMask;
01824
01825 UPDATE_EXEC_MASK(mach);
01826 break;
01827
01828 case TGSI_OPCODE_BGNSUB:
01829
01830 break;
01831
01832 case TGSI_OPCODE_ENDSUB:
01833
01834 break;
01835
01836 case TGSI_OPCODE_NOISE1:
01837 ASSERT( 0 );
01838 break;
01839
01840 case TGSI_OPCODE_NOISE2:
01841 ASSERT( 0 );
01842 break;
01843
01844 case TGSI_OPCODE_NOISE3:
01845 ASSERT( 0 );
01846 break;
01847
01848 case TGSI_OPCODE_NOISE4:
01849 ASSERT( 0 );
01850 break;
01851
01852 case TGSI_OPCODE_NOP:
01853 break;
01854
01855 default:
01856 ASSERT( 0 );
01857 }
01858 }
01859
01860
01865 uint
01866 spu_exec_machine_run( struct spu_exec_machine *mach )
01867 {
01868 uint i;
01869 int pc = 0;
01870
01871 mach->CondMask = 0xf;
01872 mach->LoopMask = 0xf;
01873 mach->ContMask = 0xf;
01874 mach->FuncMask = 0xf;
01875 mach->ExecMask = 0xf;
01876
01877 mach->CondStackTop = 0;
01878 ASSERT(mach->CondStackTop == 0);
01879 ASSERT(mach->LoopStackTop == 0);
01880 ASSERT(mach->ContStackTop == 0);
01881 ASSERT(mach->CallStackTop == 0);
01882
01883 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
01884 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
01885
01886 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
01887 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
01888 mach->Primitives[0] = 0;
01889 }
01890
01891
01892
01893 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
01894 for (i = 0; i < mach->NumDeclarations; i++) {
01895 union {
01896 struct tgsi_full_declaration decl;
01897 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
01898 } d ALIGN16_ATTRIB;
01899 unsigned ea = (unsigned) (mach->Declarations + pc);
01900
01901 spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
01902
01903 exec_declaration( mach, &d.decl );
01904 }
01905 }
01906
01907
01908 while (pc != -1) {
01909 union {
01910 struct tgsi_full_instruction inst;
01911 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
01912 } i ALIGN16_ATTRIB;
01913 unsigned ea = (unsigned) (mach->Instructions + pc);
01914
01915 spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
01916 exec_instruction( mach, & i.inst, &pc );
01917 }
01918
01919 #if 0
01920
01921 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
01922
01923
01924
01925 for (i = 0; i < 4; i++)
01926 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
01927 }
01928 #endif
01929
01930 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
01931 }
01932
01933