spu_exec.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00053 #include <transpose_matrix4x4.h>
00054 #include <simdmath/ceilf4.h>
00055 #include <simdmath/cosf4.h>
00056 #include <simdmath/divf4.h>
00057 #include <simdmath/floorf4.h>
00058 #include <simdmath/log2f4.h>
00059 #include <simdmath/powf4.h>
00060 #include <simdmath/sinf4.h>
00061 #include <simdmath/sqrtf4.h>
00062 #include <simdmath/truncf4.h>
00063 
00064 #include "pipe/p_compiler.h"
00065 #include "pipe/p_state.h"
00066 #include "pipe/p_shader_tokens.h"
00067 #include "tgsi/tgsi_parse.h"
00068 #include "tgsi/tgsi_util.h"
00069 #include "spu_exec.h"
00070 #include "spu_main.h"
00071 #include "spu_vertex_shader.h"
00072 #include "spu_dcache.h"
00073 #include "cell/common.h"
00074 
00075 #define TILE_TOP_LEFT     0
00076 #define TILE_TOP_RIGHT    1
00077 #define TILE_BOTTOM_LEFT  2
00078 #define TILE_BOTTOM_RIGHT 3
00079 
00080 /*
00081  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
00082  */
00083 #define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
00084 #define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
00085 #define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
00086 #define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
00087 #define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
00088 #define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
00089 #define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
00090 #define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
00091 #define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
00092 #define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
00093 #define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
00094 #define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
00095 #define TEMP_128_I         TGSI_EXEC_TEMP_128_I
00096 #define TEMP_128_C         TGSI_EXEC_TEMP_128_C
00097 #define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
00098 #define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
00099 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
00100 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
00101 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
00102 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
00103 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
00104 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
00105 #define TEMP_R0            TGSI_EXEC_TEMP_R0
00106 
00107 #define FOR_EACH_CHANNEL(CHAN)\
00108    for (CHAN = 0; CHAN < 4; CHAN++)
00109 
00110 #define IS_CHANNEL_ENABLED(INST, CHAN)\
00111    ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
00112 
00113 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
00114    ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
00115 
00116 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
00117    FOR_EACH_CHANNEL( CHAN )\
00118       if (IS_CHANNEL_ENABLED( INST, CHAN ))
00119 
00120 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
00121    FOR_EACH_CHANNEL( CHAN )\
00122       if (IS_CHANNEL_ENABLED2( INST, CHAN ))
00123 
00124 
00126 #define UPDATE_EXEC_MASK(MACH) \
00127       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
00128 
00129 
00130 #define CHAN_X  0
00131 #define CHAN_Y  1
00132 #define CHAN_Z  2
00133 #define CHAN_W  3
00134 
00135 
00136 
00142 void
00143 spu_exec_machine_init(struct spu_exec_machine *mach,
00144                       uint numSamplers,
00145                       struct spu_sampler *samplers,
00146                       unsigned processor)
00147 {
00148    const qword zero = si_il(0);
00149    const qword not_zero = si_il(~0);
00150 
00151    (void) numSamplers;
00152    mach->Samplers = samplers;
00153    mach->Processor = processor;
00154    mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
00155 
00156    /* Setup constants. */
00157    mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
00158    mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
00159    mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
00160    mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
00161 
00162    mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
00163    mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
00164    mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
00165    mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
00166 }
00167 
00168 
00169 static INLINE qword
00170 micro_abs(qword src)
00171 {
00172    return si_rotmi(si_shli(src, 1), -1);
00173 }
00174 
00175 static INLINE qword
00176 micro_ceil(qword src)
00177 {
00178    return (qword) _ceilf4((vec_float4) src);
00179 }
00180 
00181 static INLINE qword
00182 micro_cos(qword src)
00183 {
00184    return (qword) _cosf4((vec_float4) src);
00185 }
00186 
00187 static const qword br_shuf = {
00188    TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00189    TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00190    TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00191    TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00192    TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00193    TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00194    TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
00195    TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
00196 };
00197 
00198 static const qword bl_shuf = {
00199    TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00200    TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00201    TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00202    TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00203    TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00204    TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00205    TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
00206    TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
00207 };
00208 
00209 static const qword tl_shuf = {
00210    TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00211    TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00212    TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00213    TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00214    TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00215    TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00216    TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
00217    TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
00218 };
00219 
00220 static qword
00221 micro_ddx(qword src)
00222 {
00223    qword bottom_right = si_shufb(src, src, br_shuf);
00224    qword bottom_left = si_shufb(src, src, bl_shuf);
00225 
00226    return si_fs(bottom_right, bottom_left);
00227 }
00228 
00229 static qword
00230 micro_ddy(qword src)
00231 {
00232    qword top_left = si_shufb(src, src, tl_shuf);
00233    qword bottom_left = si_shufb(src, src, bl_shuf);
00234 
00235    return si_fs(top_left, bottom_left);
00236 }
00237 
00238 static INLINE qword
00239 micro_div(qword src0, qword src1)
00240 {
00241    return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
00242 }
00243 
00244 static qword
00245 micro_flr(qword src)
00246 {
00247    return (qword) _floorf4((vec_float4) src);
00248 }
00249 
00250 static qword
00251 micro_frc(qword src)
00252 {
00253    return si_fs(src, (qword) _floorf4((vec_float4) src));
00254 }
00255 
00256 static INLINE qword
00257 micro_ge(qword src0, qword src1)
00258 {
00259    return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00260 }
00261 
00262 static qword
00263 micro_lg2(qword src)
00264 {
00265    return (qword) _log2f4((vec_float4) src);
00266 }
00267 
00268 static INLINE qword
00269 micro_lt(qword src0, qword src1)
00270 {
00271    const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00272 
00273    return si_xori(tmp, 0xff);
00274 }
00275 
00276 static INLINE qword
00277 micro_max(qword src0, qword src1)
00278 {
00279    return si_selb(src1, src0, si_fcgt(src0, src1));
00280 }
00281 
00282 static INLINE qword
00283 micro_min(qword src0, qword src1)
00284 {
00285    return si_selb(src0, src1, si_fcgt(src0, src1));
00286 }
00287 
00288 static qword
00289 micro_neg(qword src)
00290 {
00291    return si_xor(src, (qword) spu_splats(0x80000000));
00292 }
00293 
00294 static qword
00295 micro_set_sign(qword src)
00296 {
00297    return si_or(src, (qword) spu_splats(0x80000000));
00298 }
00299 
00300 static qword
00301 micro_pow(qword src0, qword src1)
00302 {
00303    return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
00304 }
00305 
00306 static qword
00307 micro_rnd(qword src)
00308 {
00309    const qword half = (qword) spu_splats(0.5f);
00310 
00311    /* May be able to use _roundf4.  There may be some difference, though.
00312     */
00313    return (qword) _floorf4((vec_float4) si_fa(src, half));
00314 }
00315 
00316 static INLINE qword
00317 micro_ishr(qword src0, qword src1)
00318 {
00319    return si_rotma(src0, si_sfi(src1, 0));
00320 }
00321 
00322 static qword
00323 micro_trunc(qword src)
00324 {
00325    return (qword) _truncf4((vec_float4) src);
00326 }
00327 
00328 static qword
00329 micro_sin(qword src)
00330 {
00331    return (qword) _sinf4((vec_float4) src);
00332 }
00333 
00334 static INLINE qword
00335 micro_sqrt(qword src)
00336 {
00337    return (qword) _sqrtf4((vec_float4) src);
00338 }
00339 
00340 static void
00341 fetch_src_file_channel(
00342    const struct spu_exec_machine *mach,
00343    const uint file,
00344    const uint swizzle,
00345    const union spu_exec_channel *index,
00346    union spu_exec_channel *chan )
00347 {
00348    switch( swizzle ) {
00349    case TGSI_EXTSWIZZLE_X:
00350    case TGSI_EXTSWIZZLE_Y:
00351    case TGSI_EXTSWIZZLE_Z:
00352    case TGSI_EXTSWIZZLE_W:
00353       switch( file ) {
00354       case TGSI_FILE_CONSTANT: {
00355          unsigned i;
00356 
00357          for (i = 0; i < 4; i++) {
00358             const float *ptr = mach->Consts[index->i[i]];
00359             float tmp[4];
00360 
00361             spu_dcache_fetch_unaligned((qword *) tmp,
00362                                        (uintptr_t)(ptr + swizzle),
00363                                        sizeof(float));
00364 
00365             chan->f[i] = tmp[0];
00366          }
00367          break;
00368       }
00369 
00370       case TGSI_FILE_INPUT:
00371          chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
00372          chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
00373          chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
00374          chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
00375          break;
00376 
00377       case TGSI_FILE_TEMPORARY:
00378          chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
00379          chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
00380          chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
00381          chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
00382          break;
00383 
00384       case TGSI_FILE_IMMEDIATE:
00385          ASSERT( index->i[0] < (int) mach->ImmLimit );
00386          ASSERT( index->i[1] < (int) mach->ImmLimit );
00387          ASSERT( index->i[2] < (int) mach->ImmLimit );
00388          ASSERT( index->i[3] < (int) mach->ImmLimit );
00389 
00390          chan->f[0] = mach->Imms[index->i[0]][swizzle];
00391          chan->f[1] = mach->Imms[index->i[1]][swizzle];
00392          chan->f[2] = mach->Imms[index->i[2]][swizzle];
00393          chan->f[3] = mach->Imms[index->i[3]][swizzle];
00394          break;
00395 
00396       case TGSI_FILE_ADDRESS:
00397          chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
00398          chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
00399          chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
00400          chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
00401          break;
00402 
00403       case TGSI_FILE_OUTPUT:
00404          /* vertex/fragment output vars can be read too */
00405          chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
00406          chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
00407          chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
00408          chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
00409          break;
00410 
00411       default:
00412          ASSERT( 0 );
00413       }
00414       break;
00415 
00416    case TGSI_EXTSWIZZLE_ZERO:
00417       *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
00418       break;
00419 
00420    case TGSI_EXTSWIZZLE_ONE:
00421       *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
00422       break;
00423 
00424    default:
00425       ASSERT( 0 );
00426    }
00427 }
00428 
00429 static void
00430 fetch_source(
00431    const struct spu_exec_machine *mach,
00432    union spu_exec_channel *chan,
00433    const struct tgsi_full_src_register *reg,
00434    const uint chan_index )
00435 {
00436    union spu_exec_channel index;
00437    uint swizzle;
00438 
00439    index.i[0] =
00440    index.i[1] =
00441    index.i[2] =
00442    index.i[3] = reg->SrcRegister.Index;
00443 
00444    if (reg->SrcRegister.Indirect) {
00445       union spu_exec_channel index2;
00446       union spu_exec_channel indir_index;
00447 
00448       index2.i[0] =
00449       index2.i[1] =
00450       index2.i[2] =
00451       index2.i[3] = reg->SrcRegisterInd.Index;
00452 
00453       swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
00454                                                    CHAN_X);
00455       fetch_src_file_channel(
00456          mach,
00457          reg->SrcRegisterInd.File,
00458          swizzle,
00459          &index2,
00460          &indir_index );
00461 
00462       index.q = si_a(index.q, indir_index.q);
00463    }
00464 
00465    if( reg->SrcRegister.Dimension ) {
00466       switch( reg->SrcRegister.File ) {
00467       case TGSI_FILE_INPUT:
00468          index.q = si_mpyi(index.q, 17);
00469          break;
00470       case TGSI_FILE_CONSTANT:
00471          index.q = si_shli(index.q, 12);
00472          break;
00473       default:
00474          ASSERT( 0 );
00475       }
00476 
00477       index.i[0] += reg->SrcRegisterDim.Index;
00478       index.i[1] += reg->SrcRegisterDim.Index;
00479       index.i[2] += reg->SrcRegisterDim.Index;
00480       index.i[3] += reg->SrcRegisterDim.Index;
00481 
00482       if (reg->SrcRegisterDim.Indirect) {
00483          union spu_exec_channel index2;
00484          union spu_exec_channel indir_index;
00485 
00486          index2.i[0] =
00487          index2.i[1] =
00488          index2.i[2] =
00489          index2.i[3] = reg->SrcRegisterDimInd.Index;
00490 
00491          swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
00492          fetch_src_file_channel(
00493             mach,
00494             reg->SrcRegisterDimInd.File,
00495             swizzle,
00496             &index2,
00497             &indir_index );
00498 
00499          index.q = si_a(index.q, indir_index.q);
00500       }
00501    }
00502 
00503    swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
00504    fetch_src_file_channel(
00505       mach,
00506       reg->SrcRegister.File,
00507       swizzle,
00508       &index,
00509       chan );
00510 
00511    switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
00512    case TGSI_UTIL_SIGN_CLEAR:
00513       chan->q = micro_abs(chan->q);
00514       break;
00515 
00516    case TGSI_UTIL_SIGN_SET:
00517       chan->q = micro_set_sign(chan->q);
00518       break;
00519 
00520    case TGSI_UTIL_SIGN_TOGGLE:
00521       chan->q = micro_neg(chan->q);
00522       break;
00523 
00524    case TGSI_UTIL_SIGN_KEEP:
00525       break;
00526    }
00527 
00528    if (reg->SrcRegisterExtMod.Complement) {
00529       chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
00530    }
00531 }
00532 
00533 static void
00534 store_dest(
00535    struct spu_exec_machine *mach,
00536    const union spu_exec_channel *chan,
00537    const struct tgsi_full_dst_register *reg,
00538    const struct tgsi_full_instruction *inst,
00539    uint chan_index )
00540 {
00541    union spu_exec_channel *dst;
00542 
00543    switch( reg->DstRegister.File ) {
00544    case TGSI_FILE_NULL:
00545       return;
00546 
00547    case TGSI_FILE_OUTPUT:
00548       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
00549                            + reg->DstRegister.Index].xyzw[chan_index];
00550       break;
00551 
00552    case TGSI_FILE_TEMPORARY:
00553       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
00554       break;
00555 
00556    case TGSI_FILE_ADDRESS:
00557       dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
00558       break;
00559 
00560    default:
00561       ASSERT( 0 );
00562       return;
00563    }
00564 
00565    switch (inst->Instruction.Saturate)
00566    {
00567    case TGSI_SAT_NONE:
00568       if (mach->ExecMask & 0x1)
00569          dst->i[0] = chan->i[0];
00570       if (mach->ExecMask & 0x2)
00571          dst->i[1] = chan->i[1];
00572       if (mach->ExecMask & 0x4)
00573          dst->i[2] = chan->i[2];
00574       if (mach->ExecMask & 0x8)
00575          dst->i[3] = chan->i[3];
00576       break;
00577 
00578    case TGSI_SAT_ZERO_ONE:
00579       /* XXX need to obey ExecMask here */
00580       dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00581       dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
00582       break;
00583 
00584    case TGSI_SAT_MINUS_PLUS_ONE:
00585       ASSERT( 0 );
00586       break;
00587 
00588    default:
00589       ASSERT( 0 );
00590    }
00591 }
00592 
00593 #define FETCH(VAL,INDEX,CHAN)\
00594     fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
00595 
00596 #define STORE(VAL,INDEX,CHAN)\
00597     store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
00598 
00599 
00604 static void
00605 exec_kil(struct spu_exec_machine *mach,
00606          const struct tgsi_full_instruction *inst)
00607 {
00608    uint uniquemask;
00609    uint chan_index;
00610    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
00611    union spu_exec_channel r[1];
00612 
00613    /* This mask stores component bits that were already tested. Note that
00614     * we test if the value is less than zero, so 1.0 and 0.0 need not to be
00615     * tested. */
00616    uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
00617 
00618    for (chan_index = 0; chan_index < 4; chan_index++)
00619    {
00620       uint swizzle;
00621       uint i;
00622 
00623       /* unswizzle channel */
00624       swizzle = tgsi_util_get_full_src_register_extswizzle (
00625                         &inst->FullSrcRegisters[0],
00626                         chan_index);
00627 
00628       /* check if the component has not been already tested */
00629       if (uniquemask & (1 << swizzle))
00630          continue;
00631       uniquemask |= 1 << swizzle;
00632 
00633       FETCH(&r[0], 0, chan_index);
00634       for (i = 0; i < 4; i++)
00635          if (r[0].f[i] < 0.0f)
00636             kilmask |= 1 << i;
00637    }
00638 
00639    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00640 }
00641 
00646 static void
00647 exec_kilp(struct tgsi_exec_machine *mach,
00648           const struct tgsi_full_instruction *inst)
00649 {
00650    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
00651 
00652    /* TODO: build kilmask from CC mask */
00653 
00654    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00655 }
00656 
00657 /*
00658  * Fetch a texel using STR texture coordinates.
00659  */
00660 static void
00661 fetch_texel( struct spu_sampler *sampler,
00662              const union spu_exec_channel *s,
00663              const union spu_exec_channel *t,
00664              const union spu_exec_channel *p,
00665              float lodbias,  /* XXX should be float[4] */
00666              union spu_exec_channel *r,
00667              union spu_exec_channel *g,
00668              union spu_exec_channel *b,
00669              union spu_exec_channel *a )
00670 {
00671    qword rgba[4];
00672    qword out[4];
00673 
00674    sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, 
00675                         (float (*)[4]) rgba);
00676 
00677    _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
00678    r->q = out[0];
00679    g->q = out[1];
00680    b->q = out[2];
00681    a->q = out[3];
00682 }
00683 
00684 
00685 static void
00686 exec_tex(struct spu_exec_machine *mach,
00687          const struct tgsi_full_instruction *inst,
00688          boolean biasLod, boolean projected)
00689 {
00690    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
00691    union spu_exec_channel r[8];
00692    uint chan_index;
00693    float lodBias;
00694 
00695    /*   printf("Sampler %u unit %u\n", sampler, unit); */
00696 
00697    switch (inst->InstructionExtTexture.Texture) {
00698    case TGSI_TEXTURE_1D:
00699 
00700       FETCH(&r[0], 0, CHAN_X);
00701 
00702       if (projected) {
00703          FETCH(&r[1], 0, CHAN_W);
00704          r[0].q = micro_div(r[0].q, r[1].q);
00705       }
00706 
00707       if (biasLod) {
00708          FETCH(&r[1], 0, CHAN_W);
00709          lodBias = r[2].f[0];
00710       }
00711       else
00712          lodBias = 0.0;
00713 
00714       fetch_texel(&mach->Samplers[unit],
00715                   &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
00716                   &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
00717       break;
00718 
00719    case TGSI_TEXTURE_2D:
00720    case TGSI_TEXTURE_RECT:
00721 
00722       FETCH(&r[0], 0, CHAN_X);
00723       FETCH(&r[1], 0, CHAN_Y);
00724       FETCH(&r[2], 0, CHAN_Z);
00725 
00726       if (projected) {
00727          FETCH(&r[3], 0, CHAN_W);
00728          r[0].q = micro_div(r[0].q, r[3].q);
00729          r[1].q = micro_div(r[1].q, r[3].q);
00730          r[2].q = micro_div(r[2].q, r[3].q);
00731       }
00732 
00733       if (biasLod) {
00734          FETCH(&r[3], 0, CHAN_W);
00735          lodBias = r[3].f[0];
00736       }
00737       else
00738          lodBias = 0.0;
00739 
00740       fetch_texel(&mach->Samplers[unit],
00741                   &r[0], &r[1], &r[2], lodBias,  /* inputs */
00742                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
00743       break;
00744 
00745    case TGSI_TEXTURE_3D:
00746    case TGSI_TEXTURE_CUBE:
00747 
00748       FETCH(&r[0], 0, CHAN_X);
00749       FETCH(&r[1], 0, CHAN_Y);
00750       FETCH(&r[2], 0, CHAN_Z);
00751 
00752       if (projected) {
00753          FETCH(&r[3], 0, CHAN_W);
00754          r[0].q = micro_div(r[0].q, r[3].q);
00755          r[1].q = micro_div(r[1].q, r[3].q);
00756          r[2].q = micro_div(r[2].q, r[3].q);
00757       }
00758 
00759       if (biasLod) {
00760          FETCH(&r[3], 0, CHAN_W);
00761          lodBias = r[3].f[0];
00762       }
00763       else
00764          lodBias = 0.0;
00765 
00766       fetch_texel(&mach->Samplers[unit],
00767                   &r[0], &r[1], &r[2], lodBias,
00768                   &r[0], &r[1], &r[2], &r[3]);
00769       break;
00770 
00771    default:
00772       ASSERT (0);
00773    }
00774 
00775    FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00776       STORE( &r[chan_index], 0, chan_index );
00777    }
00778 }
00779 
00780 
00781 
00782 static void
00783 constant_interpolation(
00784    struct spu_exec_machine *mach,
00785    unsigned attrib,
00786    unsigned chan )
00787 {
00788    unsigned i;
00789 
00790    for( i = 0; i < QUAD_SIZE; i++ ) {
00791       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
00792    }
00793 }
00794 
00795 static void
00796 linear_interpolation(
00797    struct spu_exec_machine *mach,
00798    unsigned attrib,
00799    unsigned chan )
00800 {
00801    const float x = mach->QuadPos.xyzw[0].f[0];
00802    const float y = mach->QuadPos.xyzw[1].f[0];
00803    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00804    const float dady = mach->InterpCoefs[attrib].dady[chan];
00805    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00806    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
00807    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
00808    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
00809    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
00810 }
00811 
00812 static void
00813 perspective_interpolation(
00814    struct spu_exec_machine *mach,
00815    unsigned attrib,
00816    unsigned chan )
00817 {
00818    const float x = mach->QuadPos.xyzw[0].f[0];
00819    const float y = mach->QuadPos.xyzw[1].f[0];
00820    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00821    const float dady = mach->InterpCoefs[attrib].dady[chan];
00822    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00823    const float *w = mach->QuadPos.xyzw[3].f;
00824    /* divide by W here */
00825    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
00826    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
00827    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
00828    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
00829 }
00830 
00831 
00832 typedef void (* interpolation_func)(
00833    struct spu_exec_machine *mach,
00834    unsigned attrib,
00835    unsigned chan );
00836 
00837 static void
00838 exec_declaration(struct spu_exec_machine *mach,
00839                  const struct tgsi_full_declaration *decl)
00840 {
00841    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
00842       if( decl->Declaration.File == TGSI_FILE_INPUT ) {
00843          unsigned first, last, mask;
00844          interpolation_func interp;
00845 
00846          first = decl->DeclarationRange.First;
00847          last = decl->DeclarationRange.Last;
00848          mask = decl->Declaration.UsageMask;
00849 
00850          switch( decl->Declaration.Interpolate ) {
00851          case TGSI_INTERPOLATE_CONSTANT:
00852             interp = constant_interpolation;
00853             break;
00854 
00855          case TGSI_INTERPOLATE_LINEAR:
00856             interp = linear_interpolation;
00857             break;
00858 
00859          case TGSI_INTERPOLATE_PERSPECTIVE:
00860             interp = perspective_interpolation;
00861             break;
00862 
00863          default:
00864             ASSERT( 0 );
00865          }
00866 
00867          if( mask == TGSI_WRITEMASK_XYZW ) {
00868             unsigned i, j;
00869 
00870             for( i = first; i <= last; i++ ) {
00871                for( j = 0; j < NUM_CHANNELS; j++ ) {
00872                   interp( mach, i, j );
00873                }
00874             }
00875          }
00876          else {
00877             unsigned i, j;
00878 
00879             for( j = 0; j < NUM_CHANNELS; j++ ) {
00880                if( mask & (1 << j) ) {
00881                   for( i = first; i <= last; i++ ) {
00882                      interp( mach, i, j );
00883                   }
00884                }
00885             }
00886          }
00887       }
00888    }
00889 }
00890 
00891 static void
00892 exec_instruction(
00893    struct spu_exec_machine *mach,
00894    const struct tgsi_full_instruction *inst,
00895    int *pc )
00896 {
00897    uint chan_index;
00898    union spu_exec_channel r[8];
00899 
00900    (*pc)++;
00901 
00902    switch (inst->Instruction.Opcode) {
00903    case TGSI_OPCODE_ARL:
00904       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00905          FETCH( &r[0], 0, chan_index );
00906          r[0].q = si_cflts(r[0].q, 0);
00907          STORE( &r[0], 0, chan_index );
00908       }
00909       break;
00910 
00911    case TGSI_OPCODE_MOV:
00912    case TGSI_OPCODE_SWZ:
00913       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00914          FETCH( &r[0], 0, chan_index );
00915          STORE( &r[0], 0, chan_index );
00916       }
00917       break;
00918 
00919    case TGSI_OPCODE_LIT:
00920       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
00921          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
00922       }
00923 
00924       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00925          FETCH( &r[0], 0, CHAN_X );
00926          if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
00927             r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00928             STORE( &r[0], 0, CHAN_Y );
00929          }
00930 
00931          if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00932             FETCH( &r[1], 0, CHAN_Y );
00933             r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00934 
00935             FETCH( &r[2], 0, CHAN_W );
00936             r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
00937             r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
00938             r[1].q = micro_pow(r[1].q, r[2].q);
00939 
00940             /* r0 = (r0 > 0.0) ? r1 : 0.0
00941              */
00942             r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00943             r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
00944                              r[0].q);
00945             STORE( &r[0], 0, CHAN_Z );
00946          }
00947       }
00948 
00949       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
00950          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
00951       }
00952       break;
00953 
00954    case TGSI_OPCODE_RCP:
00955    /* TGSI_OPCODE_RECIP */
00956       FETCH( &r[0], 0, CHAN_X );
00957       r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00958       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00959          STORE( &r[0], 0, chan_index );
00960       }
00961       break;
00962 
00963    case TGSI_OPCODE_RSQ:
00964    /* TGSI_OPCODE_RECIPSQRT */
00965       FETCH( &r[0], 0, CHAN_X );
00966       r[0].q = micro_sqrt(r[0].q);
00967       r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00968       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00969          STORE( &r[0], 0, chan_index );
00970       }
00971       break;
00972 
00973    case TGSI_OPCODE_EXP:
00974       ASSERT (0);
00975       break;
00976 
00977    case TGSI_OPCODE_LOG:
00978       ASSERT (0);
00979       break;
00980 
00981    case TGSI_OPCODE_MUL:
00982       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
00983       {
00984          FETCH(&r[0], 0, chan_index);
00985          FETCH(&r[1], 1, chan_index);
00986 
00987          r[0].q = si_fm(r[0].q, r[1].q);
00988 
00989          STORE(&r[0], 0, chan_index);
00990       }
00991       break;
00992 
00993    case TGSI_OPCODE_ADD:
00994       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00995          FETCH( &r[0], 0, chan_index );
00996          FETCH( &r[1], 1, chan_index );
00997          r[0].q = si_fa(r[0].q, r[1].q);
00998          STORE( &r[0], 0, chan_index );
00999       }
01000       break;
01001 
01002    case TGSI_OPCODE_DP3:
01003    /* TGSI_OPCODE_DOT3 */
01004       FETCH( &r[0], 0, CHAN_X );
01005       FETCH( &r[1], 1, CHAN_X );
01006       r[0].q = si_fm(r[0].q, r[1].q);
01007 
01008       FETCH( &r[1], 0, CHAN_Y );
01009       FETCH( &r[2], 1, CHAN_Y );
01010       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01011 
01012 
01013       FETCH( &r[1], 0, CHAN_Z );
01014       FETCH( &r[2], 1, CHAN_Z );
01015       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01016 
01017       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01018          STORE( &r[0], 0, chan_index );
01019       }
01020       break;
01021 
01022     case TGSI_OPCODE_DP4:
01023     /* TGSI_OPCODE_DOT4 */
01024        FETCH(&r[0], 0, CHAN_X);
01025        FETCH(&r[1], 1, CHAN_X);
01026 
01027       r[0].q = si_fm(r[0].q, r[1].q);
01028 
01029        FETCH(&r[1], 0, CHAN_Y);
01030        FETCH(&r[2], 1, CHAN_Y);
01031 
01032       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01033 
01034        FETCH(&r[1], 0, CHAN_Z);
01035        FETCH(&r[2], 1, CHAN_Z);
01036 
01037       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01038 
01039        FETCH(&r[1], 0, CHAN_W);
01040        FETCH(&r[2], 1, CHAN_W);
01041 
01042       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01043 
01044       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01045          STORE( &r[0], 0, chan_index );
01046       }
01047       break;
01048 
01049    case TGSI_OPCODE_DST:
01050       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01051          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01052       }
01053 
01054       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01055          FETCH( &r[0], 0, CHAN_Y );
01056          FETCH( &r[1], 1, CHAN_Y);
01057       r[0].q = si_fm(r[0].q, r[1].q);
01058          STORE( &r[0], 0, CHAN_Y );
01059       }
01060 
01061       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01062          FETCH( &r[0], 0, CHAN_Z );
01063          STORE( &r[0], 0, CHAN_Z );
01064       }
01065 
01066       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01067          FETCH( &r[0], 1, CHAN_W );
01068          STORE( &r[0], 0, CHAN_W );
01069       }
01070       break;
01071 
01072    case TGSI_OPCODE_MIN:
01073       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01074          FETCH(&r[0], 0, chan_index);
01075          FETCH(&r[1], 1, chan_index);
01076 
01077          r[0].q = micro_min(r[0].q, r[1].q);
01078 
01079          STORE(&r[0], 0, chan_index);
01080       }
01081       break;
01082 
01083    case TGSI_OPCODE_MAX:
01084       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01085          FETCH(&r[0], 0, chan_index);
01086          FETCH(&r[1], 1, chan_index);
01087 
01088          r[0].q = micro_max(r[0].q, r[1].q);
01089 
01090          STORE(&r[0], 0, chan_index );
01091       }
01092       break;
01093 
01094    case TGSI_OPCODE_SLT:
01095    /* TGSI_OPCODE_SETLT */
01096       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01097          FETCH( &r[0], 0, chan_index );
01098          FETCH( &r[1], 1, chan_index );
01099 
01100          r[0].q = micro_ge(r[0].q, r[1].q);
01101          r[0].q = si_xori(r[0].q, 0xff);
01102 
01103          STORE( &r[0], 0, chan_index );
01104       }
01105       break;
01106 
01107    case TGSI_OPCODE_SGE:
01108    /* TGSI_OPCODE_SETGE */
01109       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01110          FETCH( &r[0], 0, chan_index );
01111          FETCH( &r[1], 1, chan_index );
01112          r[0].q = micro_ge(r[0].q, r[1].q);
01113          STORE( &r[0], 0, chan_index );
01114       }
01115       break;
01116 
01117    case TGSI_OPCODE_MAD:
01118    /* TGSI_OPCODE_MADD */
01119       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01120          FETCH( &r[0], 0, chan_index );
01121          FETCH( &r[1], 1, chan_index );
01122          FETCH( &r[2], 2, chan_index );
01123          r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01124          STORE( &r[0], 0, chan_index );
01125       }
01126       break;
01127 
01128    case TGSI_OPCODE_SUB:
01129       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01130          FETCH(&r[0], 0, chan_index);
01131          FETCH(&r[1], 1, chan_index);
01132 
01133          r[0].q = si_fs(r[0].q, r[1].q);
01134 
01135          STORE(&r[0], 0, chan_index);
01136       }
01137       break;
01138 
01139    case TGSI_OPCODE_LERP:
01140    /* TGSI_OPCODE_LRP */
01141       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01142          FETCH(&r[0], 0, chan_index);
01143          FETCH(&r[1], 1, chan_index);
01144          FETCH(&r[2], 2, chan_index);
01145 
01146          r[1].q = si_fs(r[1].q, r[2].q);
01147          r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01148 
01149          STORE(&r[0], 0, chan_index);
01150       }
01151       break;
01152 
01153    case TGSI_OPCODE_CND:
01154       ASSERT (0);
01155       break;
01156 
01157    case TGSI_OPCODE_CND0:
01158       ASSERT (0);
01159       break;
01160 
01161    case TGSI_OPCODE_DOT2ADD:
01162       /* TGSI_OPCODE_DP2A */
01163       ASSERT (0);
01164       break;
01165 
01166    case TGSI_OPCODE_INDEX:
01167       ASSERT (0);
01168       break;
01169 
01170    case TGSI_OPCODE_NEGATE:
01171       ASSERT (0);
01172       break;
01173 
01174    case TGSI_OPCODE_FRAC:
01175    /* TGSI_OPCODE_FRC */
01176       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01177          FETCH( &r[0], 0, chan_index );
01178          r[0].q = micro_frc(r[0].q);
01179          STORE( &r[0], 0, chan_index );
01180       }
01181       break;
01182 
01183    case TGSI_OPCODE_CLAMP:
01184       ASSERT (0);
01185       break;
01186 
01187    case TGSI_OPCODE_FLOOR:
01188    /* TGSI_OPCODE_FLR */
01189       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01190          FETCH( &r[0], 0, chan_index );
01191          r[0].q = micro_flr(r[0].q);
01192          STORE( &r[0], 0, chan_index );
01193       }
01194       break;
01195 
01196    case TGSI_OPCODE_ROUND:
01197       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01198          FETCH( &r[0], 0, chan_index );
01199          r[0].q = micro_rnd(r[0].q);
01200          STORE( &r[0], 0, chan_index );
01201       }
01202       break;
01203 
01204    case TGSI_OPCODE_EXPBASE2:
01205     /* TGSI_OPCODE_EX2 */
01206       FETCH(&r[0], 0, CHAN_X);
01207 
01208       r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
01209 
01210       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01211          STORE( &r[0], 0, chan_index );
01212       }
01213       break;
01214 
01215    case TGSI_OPCODE_LOGBASE2:
01216    /* TGSI_OPCODE_LG2 */
01217       FETCH( &r[0], 0, CHAN_X );
01218       r[0].q = micro_lg2(r[0].q);
01219       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01220          STORE( &r[0], 0, chan_index );
01221       }
01222       break;
01223 
01224    case TGSI_OPCODE_POWER:
01225       /* TGSI_OPCODE_POW */
01226       FETCH(&r[0], 0, CHAN_X);
01227       FETCH(&r[1], 1, CHAN_X);
01228 
01229       r[0].q = micro_pow(r[0].q, r[1].q);
01230 
01231       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01232          STORE( &r[0], 0, chan_index );
01233       }
01234       break;
01235 
01236    case TGSI_OPCODE_CROSSPRODUCT:
01237       /* TGSI_OPCODE_XPD */
01238       FETCH(&r[0], 0, CHAN_Y);
01239       FETCH(&r[1], 1, CHAN_Z);
01240       FETCH(&r[3], 0, CHAN_Z);
01241       FETCH(&r[4], 1, CHAN_Y);
01242 
01243       /* r2 = (r0 * r1) - (r3 * r5)
01244        */
01245       r[2].q = si_fm(r[3].q, r[5].q);
01246       r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
01247 
01248       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01249          STORE( &r[2], 0, CHAN_X );
01250       }
01251 
01252       FETCH(&r[2], 1, CHAN_X);
01253       FETCH(&r[5], 0, CHAN_X);
01254 
01255       /* r3 = (r3 * r2) - (r1 * r5)
01256        */
01257       r[1].q = si_fm(r[1].q, r[5].q);
01258       r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
01259 
01260       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01261          STORE( &r[3], 0, CHAN_Y );
01262       }
01263 
01264       /* r5 = (r5 * r4) - (r0 * r2)
01265        */
01266       r[0].q = si_fm(r[0].q, r[2].q);
01267       r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
01268 
01269       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01270          STORE( &r[5], 0, CHAN_Z );
01271       }
01272 
01273       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01274          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01275       }
01276       break;
01277 
01278     case TGSI_OPCODE_MULTIPLYMATRIX:
01279        ASSERT (0);
01280        break;
01281 
01282     case TGSI_OPCODE_ABS:
01283        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01284           FETCH(&r[0], 0, chan_index);
01285 
01286           r[0].q = micro_abs(r[0].q);
01287 
01288           STORE(&r[0], 0, chan_index);
01289        }
01290        break;
01291 
01292    case TGSI_OPCODE_RCC:
01293       ASSERT (0);
01294       break;
01295 
01296    case TGSI_OPCODE_DPH:
01297       FETCH(&r[0], 0, CHAN_X);
01298       FETCH(&r[1], 1, CHAN_X);
01299 
01300       r[0].q = si_fm(r[0].q, r[1].q);
01301 
01302       FETCH(&r[1], 0, CHAN_Y);
01303       FETCH(&r[2], 1, CHAN_Y);
01304 
01305       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01306 
01307       FETCH(&r[1], 0, CHAN_Z);
01308       FETCH(&r[2], 1, CHAN_Z);
01309 
01310       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01311 
01312       FETCH(&r[1], 1, CHAN_W);
01313 
01314       r[0].q = si_fa(r[0].q, r[1].q);
01315 
01316       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01317          STORE( &r[0], 0, chan_index );
01318       }
01319       break;
01320 
01321    case TGSI_OPCODE_COS:
01322       FETCH(&r[0], 0, CHAN_X);
01323 
01324       r[0].q = micro_cos(r[0].q);
01325 
01326       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01327          STORE( &r[0], 0, chan_index );
01328       }
01329       break;
01330 
01331    case TGSI_OPCODE_DDX:
01332       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01333          FETCH( &r[0], 0, chan_index );
01334          r[0].q = micro_ddx(r[0].q);
01335          STORE( &r[0], 0, chan_index );
01336       }
01337       break;
01338 
01339    case TGSI_OPCODE_DDY:
01340       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01341          FETCH( &r[0], 0, chan_index );
01342          r[0].q = micro_ddy(r[0].q);
01343          STORE( &r[0], 0, chan_index );
01344       }
01345       break;
01346 
01347    case TGSI_OPCODE_KILP:
01348       exec_kilp (mach, inst);
01349       break;
01350 
01351    case TGSI_OPCODE_KIL:
01352       exec_kil (mach, inst);
01353       break;
01354 
01355    case TGSI_OPCODE_PK2H:
01356       ASSERT (0);
01357       break;
01358 
01359    case TGSI_OPCODE_PK2US:
01360       ASSERT (0);
01361       break;
01362 
01363    case TGSI_OPCODE_PK4B:
01364       ASSERT (0);
01365       break;
01366 
01367    case TGSI_OPCODE_PK4UB:
01368       ASSERT (0);
01369       break;
01370 
01371    case TGSI_OPCODE_RFL:
01372       ASSERT (0);
01373       break;
01374 
01375    case TGSI_OPCODE_SEQ:
01376       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01377          FETCH( &r[0], 0, chan_index );
01378          FETCH( &r[1], 1, chan_index );
01379 
01380          r[0].q = si_fceq(r[0].q, r[1].q);
01381 
01382          STORE( &r[0], 0, chan_index );
01383       }
01384       break;
01385 
01386    case TGSI_OPCODE_SFL:
01387       ASSERT (0);
01388       break;
01389 
01390    case TGSI_OPCODE_SGT:
01391       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01392          FETCH( &r[0], 0, chan_index );
01393          FETCH( &r[1], 1, chan_index );
01394          r[0].q = si_fcgt(r[0].q, r[1].q);
01395          STORE( &r[0], 0, chan_index );
01396       }
01397       break;
01398 
01399    case TGSI_OPCODE_SIN:
01400       FETCH( &r[0], 0, CHAN_X );
01401       r[0].q = micro_sin(r[0].q);
01402       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01403          STORE( &r[0], 0, chan_index );
01404       }
01405       break;
01406 
01407    case TGSI_OPCODE_SLE:
01408       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01409          FETCH( &r[0], 0, chan_index );
01410          FETCH( &r[1], 1, chan_index );
01411 
01412          r[0].q = si_fcgt(r[0].q, r[1].q);
01413          r[0].q = si_xori(r[0].q, 0xff);
01414 
01415          STORE( &r[0], 0, chan_index );
01416       }
01417       break;
01418 
01419    case TGSI_OPCODE_SNE:
01420       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01421          FETCH( &r[0], 0, chan_index );
01422          FETCH( &r[1], 1, chan_index );
01423 
01424          r[0].q = si_fceq(r[0].q, r[1].q);
01425          r[0].q = si_xori(r[0].q, 0xff);
01426 
01427          STORE( &r[0], 0, chan_index );
01428       }
01429       break;
01430 
01431    case TGSI_OPCODE_STR:
01432       ASSERT (0);
01433       break;
01434 
01435    case TGSI_OPCODE_TEX:
01436       /* simple texture lookup */
01437       /* src[0] = texcoord */
01438       /* src[1] = sampler unit */
01439       exec_tex(mach, inst, FALSE, FALSE);
01440       break;
01441 
01442    case TGSI_OPCODE_TXB:
01443       /* Texture lookup with lod bias */
01444       /* src[0] = texcoord (src[0].w = load bias) */
01445       /* src[1] = sampler unit */
01446       exec_tex(mach, inst, TRUE, FALSE);
01447       break;
01448 
01449    case TGSI_OPCODE_TXD:
01450       /* Texture lookup with explict partial derivatives */
01451       /* src[0] = texcoord */
01452       /* src[1] = d[strq]/dx */
01453       /* src[2] = d[strq]/dy */
01454       /* src[3] = sampler unit */
01455       ASSERT (0);
01456       break;
01457 
01458    case TGSI_OPCODE_TXL:
01459       /* Texture lookup with explit LOD */
01460       /* src[0] = texcoord (src[0].w = load bias) */
01461       /* src[1] = sampler unit */
01462       exec_tex(mach, inst, TRUE, FALSE);
01463       break;
01464 
01465    case TGSI_OPCODE_TXP:
01466       /* Texture lookup with projection */
01467       /* src[0] = texcoord (src[0].w = projection) */
01468       /* src[1] = sampler unit */
01469       exec_tex(mach, inst, TRUE, TRUE);
01470       break;
01471 
01472    case TGSI_OPCODE_UP2H:
01473       ASSERT (0);
01474       break;
01475 
01476    case TGSI_OPCODE_UP2US:
01477       ASSERT (0);
01478       break;
01479 
01480    case TGSI_OPCODE_UP4B:
01481       ASSERT (0);
01482       break;
01483 
01484    case TGSI_OPCODE_UP4UB:
01485       ASSERT (0);
01486       break;
01487 
01488    case TGSI_OPCODE_X2D:
01489       ASSERT (0);
01490       break;
01491 
01492    case TGSI_OPCODE_ARA:
01493       ASSERT (0);
01494       break;
01495 
01496    case TGSI_OPCODE_ARR:
01497       ASSERT (0);
01498       break;
01499 
01500    case TGSI_OPCODE_BRA:
01501       ASSERT (0);
01502       break;
01503 
01504    case TGSI_OPCODE_CAL:
01505       /* skip the call if no execution channels are enabled */
01506       if (mach->ExecMask) {
01507          /* do the call */
01508 
01509          /* push the Cond, Loop, Cont stacks */
01510          ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01511          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01512          ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01513          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01514          ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01515          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01516 
01517          ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
01518          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
01519 
01520          /* note that PC was already incremented above */
01521          mach->CallStack[mach->CallStackTop++] = *pc;
01522          *pc = inst->InstructionExtLabel.Label;
01523       }
01524       break;
01525 
01526    case TGSI_OPCODE_RET:
01527       mach->FuncMask &= ~mach->ExecMask;
01528       UPDATE_EXEC_MASK(mach);
01529 
01530       if (mach->ExecMask == 0x0) {
01531          /* really return now (otherwise, keep executing */
01532 
01533          if (mach->CallStackTop == 0) {
01534             /* returning from main() */
01535             *pc = -1;
01536             return;
01537          }
01538          *pc = mach->CallStack[--mach->CallStackTop];
01539 
01540          /* pop the Cond, Loop, Cont stacks */
01541          ASSERT(mach->CondStackTop > 0);
01542          mach->CondMask = mach->CondStack[--mach->CondStackTop];
01543          ASSERT(mach->LoopStackTop > 0);
01544          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01545          ASSERT(mach->ContStackTop > 0);
01546          mach->ContMask = mach->ContStack[--mach->ContStackTop];
01547          ASSERT(mach->FuncStackTop > 0);
01548          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
01549 
01550          UPDATE_EXEC_MASK(mach);
01551       }
01552       break;
01553 
01554    case TGSI_OPCODE_SSG:
01555       ASSERT (0);
01556       break;
01557 
01558    case TGSI_OPCODE_CMP:
01559       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01560          FETCH(&r[0], 0, chan_index);
01561          FETCH(&r[1], 1, chan_index);
01562          FETCH(&r[2], 2, chan_index);
01563 
01564          /* r0 = (r0 < 0.0) ? r1 : r2
01565           */
01566          r[3].q = si_xor(r[3].q, r[3].q);
01567          r[0].q = micro_lt(r[0].q, r[3].q);
01568          r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
01569 
01570          STORE(&r[0], 0, chan_index);
01571       }
01572       break;
01573 
01574    case TGSI_OPCODE_SCS:
01575       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01576          FETCH( &r[0], 0, CHAN_X );
01577       }
01578       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
01579          r[1].q = micro_cos(r[0].q);
01580          STORE( &r[1], 0, CHAN_X );
01581       }
01582       if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01583          r[1].q = micro_sin(r[0].q);
01584          STORE( &r[1], 0, CHAN_Y );
01585       }
01586       if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01587          STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
01588       }
01589       if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
01590          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01591       }
01592       break;
01593 
01594    case TGSI_OPCODE_NRM:
01595       ASSERT (0);
01596       break;
01597 
01598    case TGSI_OPCODE_DIV:
01599       ASSERT( 0 );
01600       break;
01601 
01602    case TGSI_OPCODE_DP2:
01603       FETCH( &r[0], 0, CHAN_X );
01604       FETCH( &r[1], 1, CHAN_X );
01605       r[0].q = si_fm(r[0].q, r[1].q);
01606 
01607       FETCH( &r[1], 0, CHAN_Y );
01608       FETCH( &r[2], 1, CHAN_Y );
01609       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01610 
01611       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01612          STORE( &r[0], 0, chan_index );
01613       }
01614       break;
01615 
01616    case TGSI_OPCODE_IF:
01617       /* push CondMask */
01618       ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01619       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01620       FETCH( &r[0], 0, CHAN_X );
01621       /* update CondMask */
01622       if( ! r[0].u[0] ) {
01623          mach->CondMask &= ~0x1;
01624       }
01625       if( ! r[0].u[1] ) {
01626          mach->CondMask &= ~0x2;
01627       }
01628       if( ! r[0].u[2] ) {
01629          mach->CondMask &= ~0x4;
01630       }
01631       if( ! r[0].u[3] ) {
01632          mach->CondMask &= ~0x8;
01633       }
01634       UPDATE_EXEC_MASK(mach);
01635       /* Todo: If CondMask==0, jump to ELSE */
01636       break;
01637 
01638    case TGSI_OPCODE_ELSE:
01639       /* invert CondMask wrt previous mask */
01640       {
01641          uint prevMask;
01642          ASSERT(mach->CondStackTop > 0);
01643          prevMask = mach->CondStack[mach->CondStackTop - 1];
01644          mach->CondMask = ~mach->CondMask & prevMask;
01645          UPDATE_EXEC_MASK(mach);
01646          /* Todo: If CondMask==0, jump to ENDIF */
01647       }
01648       break;
01649 
01650    case TGSI_OPCODE_ENDIF:
01651       /* pop CondMask */
01652       ASSERT(mach->CondStackTop > 0);
01653       mach->CondMask = mach->CondStack[--mach->CondStackTop];
01654       UPDATE_EXEC_MASK(mach);
01655       break;
01656 
01657    case TGSI_OPCODE_END:
01658       /* halt execution */
01659       *pc = -1;
01660       break;
01661 
01662    case TGSI_OPCODE_REP:
01663       ASSERT (0);
01664       break;
01665 
01666    case TGSI_OPCODE_ENDREP:
01667        ASSERT (0);
01668        break;
01669 
01670    case TGSI_OPCODE_PUSHA:
01671       ASSERT (0);
01672       break;
01673 
01674    case TGSI_OPCODE_POPA:
01675       ASSERT (0);
01676       break;
01677 
01678    case TGSI_OPCODE_CEIL:
01679       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01680          FETCH( &r[0], 0, chan_index );
01681          r[0].q = micro_ceil(r[0].q);
01682          STORE( &r[0], 0, chan_index );
01683       }
01684       break;
01685 
01686    case TGSI_OPCODE_I2F:
01687       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01688          FETCH( &r[0], 0, chan_index );
01689          r[0].q = si_csflt(r[0].q, 0);
01690          STORE( &r[0], 0, chan_index );
01691       }
01692       break;
01693 
01694    case TGSI_OPCODE_NOT:
01695       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01696          FETCH( &r[0], 0, chan_index );
01697          r[0].q = si_xorbi(r[0].q, 0xff);
01698          STORE( &r[0], 0, chan_index );
01699       }
01700       break;
01701 
01702    case TGSI_OPCODE_TRUNC:
01703       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01704          FETCH( &r[0], 0, chan_index );
01705          r[0].q = micro_trunc(r[0].q);
01706          STORE( &r[0], 0, chan_index );
01707       }
01708       break;
01709 
01710    case TGSI_OPCODE_SHL:
01711       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01712          FETCH( &r[0], 0, chan_index );
01713          FETCH( &r[1], 1, chan_index );
01714 
01715          r[0].q = si_shl(r[0].q, r[1].q);
01716 
01717          STORE( &r[0], 0, chan_index );
01718       }
01719       break;
01720 
01721    case TGSI_OPCODE_SHR:
01722       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01723          FETCH( &r[0], 0, chan_index );
01724          FETCH( &r[1], 1, chan_index );
01725          r[0].q = micro_ishr(r[0].q, r[1].q);
01726          STORE( &r[0], 0, chan_index );
01727       }
01728       break;
01729 
01730    case TGSI_OPCODE_AND:
01731       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01732          FETCH( &r[0], 0, chan_index );
01733          FETCH( &r[1], 1, chan_index );
01734          r[0].q = si_and(r[0].q, r[1].q);
01735          STORE( &r[0], 0, chan_index );
01736       }
01737       break;
01738 
01739    case TGSI_OPCODE_OR:
01740       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01741          FETCH( &r[0], 0, chan_index );
01742          FETCH( &r[1], 1, chan_index );
01743          r[0].q = si_or(r[0].q, r[1].q);
01744          STORE( &r[0], 0, chan_index );
01745       }
01746       break;
01747 
01748    case TGSI_OPCODE_MOD:
01749       ASSERT (0);
01750       break;
01751 
01752    case TGSI_OPCODE_XOR:
01753       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01754          FETCH( &r[0], 0, chan_index );
01755          FETCH( &r[1], 1, chan_index );
01756          r[0].q = si_xor(r[0].q, r[1].q);
01757          STORE( &r[0], 0, chan_index );
01758       }
01759       break;
01760 
01761    case TGSI_OPCODE_SAD:
01762       ASSERT (0);
01763       break;
01764 
01765    case TGSI_OPCODE_TXF:
01766       ASSERT (0);
01767       break;
01768 
01769    case TGSI_OPCODE_TXQ:
01770       ASSERT (0);
01771       break;
01772 
01773    case TGSI_OPCODE_EMIT:
01774       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
01775       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
01776       break;
01777 
01778    case TGSI_OPCODE_ENDPRIM:
01779       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
01780       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
01781       break;
01782 
01783    case TGSI_OPCODE_LOOP:
01784       /* fall-through (for now) */
01785    case TGSI_OPCODE_BGNLOOP2:
01786       /* push LoopMask and ContMasks */
01787       ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01788       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01789       ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01790       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01791       break;
01792 
01793    case TGSI_OPCODE_ENDLOOP:
01794       /* fall-through (for now at least) */
01795    case TGSI_OPCODE_ENDLOOP2:
01796       /* Restore ContMask, but don't pop */
01797       ASSERT(mach->ContStackTop > 0);
01798       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
01799       if (mach->LoopMask) {
01800          /* repeat loop: jump to instruction just past BGNLOOP */
01801          *pc = inst->InstructionExtLabel.Label + 1;
01802       }
01803       else {
01804          /* exit loop: pop LoopMask */
01805          ASSERT(mach->LoopStackTop > 0);
01806          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01807          /* pop ContMask */
01808          ASSERT(mach->ContStackTop > 0);
01809          mach->ContMask = mach->ContStack[--mach->ContStackTop];
01810       }
01811       UPDATE_EXEC_MASK(mach);
01812       break;
01813 
01814    case TGSI_OPCODE_BRK:
01815       /* turn off loop channels for each enabled exec channel */
01816       mach->LoopMask &= ~mach->ExecMask;
01817       /* Todo: if mach->LoopMask == 0, jump to end of loop */
01818       UPDATE_EXEC_MASK(mach);
01819       break;
01820 
01821    case TGSI_OPCODE_CONT:
01822       /* turn off cont channels for each enabled exec channel */
01823       mach->ContMask &= ~mach->ExecMask;
01824       /* Todo: if mach->LoopMask == 0, jump to end of loop */
01825       UPDATE_EXEC_MASK(mach);
01826       break;
01827 
01828    case TGSI_OPCODE_BGNSUB:
01829       /* no-op */
01830       break;
01831 
01832    case TGSI_OPCODE_ENDSUB:
01833       /* no-op */
01834       break;
01835 
01836    case TGSI_OPCODE_NOISE1:
01837       ASSERT( 0 );
01838       break;
01839 
01840    case TGSI_OPCODE_NOISE2:
01841       ASSERT( 0 );
01842       break;
01843 
01844    case TGSI_OPCODE_NOISE3:
01845       ASSERT( 0 );
01846       break;
01847 
01848    case TGSI_OPCODE_NOISE4:
01849       ASSERT( 0 );
01850       break;
01851 
01852    case TGSI_OPCODE_NOP:
01853       break;
01854 
01855    default:
01856       ASSERT( 0 );
01857    }
01858 }
01859 
01860 
01865 uint
01866 spu_exec_machine_run( struct spu_exec_machine *mach )
01867 {
01868    uint i;
01869    int pc = 0;
01870 
01871    mach->CondMask = 0xf;
01872    mach->LoopMask = 0xf;
01873    mach->ContMask = 0xf;
01874    mach->FuncMask = 0xf;
01875    mach->ExecMask = 0xf;
01876 
01877    mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
01878    ASSERT(mach->CondStackTop == 0);
01879    ASSERT(mach->LoopStackTop == 0);
01880    ASSERT(mach->ContStackTop == 0);
01881    ASSERT(mach->CallStackTop == 0);
01882 
01883    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
01884    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
01885 
01886    if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
01887       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
01888       mach->Primitives[0] = 0;
01889    }
01890 
01891 
01892    /* execute declarations (interpolants) */
01893    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
01894       for (i = 0; i < mach->NumDeclarations; i++) {
01895          union {
01896             struct tgsi_full_declaration decl;
01897             qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
01898          } d ALIGN16_ATTRIB;
01899          unsigned ea = (unsigned) (mach->Declarations + pc);
01900 
01901          spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
01902 
01903          exec_declaration( mach, &d.decl );
01904       }
01905    }
01906 
01907    /* execute instructions, until pc is set to -1 */
01908    while (pc != -1) {
01909       union {
01910          struct tgsi_full_instruction inst;
01911          qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
01912       } i ALIGN16_ATTRIB;
01913       unsigned ea = (unsigned) (mach->Instructions + pc);
01914 
01915       spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
01916       exec_instruction( mach, & i.inst, &pc );
01917    }
01918 
01919 #if 0
01920    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
01921    if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
01922       /*
01923        * Scale back depth component.
01924        */
01925       for (i = 0; i < 4; i++)
01926          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
01927    }
01928 #endif
01929 
01930    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
01931 }
01932 
01933 

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4