spu_exec.c File Reference

Include dependency graph for spu_exec.c:

Go to the source code of this file.

Defines

#define TILE_TOP_LEFT   0
 TGSI interpretor/executor.
#define TILE_TOP_RIGHT   1
#define TILE_BOTTOM_LEFT   2
#define TILE_BOTTOM_RIGHT   3
#define TEMP_0_I   TGSI_EXEC_TEMP_00000000_I
#define TEMP_0_C   TGSI_EXEC_TEMP_00000000_C
#define TEMP_7F_I   TGSI_EXEC_TEMP_7FFFFFFF_I
#define TEMP_7F_C   TGSI_EXEC_TEMP_7FFFFFFF_C
#define TEMP_80_I   TGSI_EXEC_TEMP_80000000_I
#define TEMP_80_C   TGSI_EXEC_TEMP_80000000_C
#define TEMP_FF_I   TGSI_EXEC_TEMP_FFFFFFFF_I
#define TEMP_FF_C   TGSI_EXEC_TEMP_FFFFFFFF_C
#define TEMP_1_I   TGSI_EXEC_TEMP_ONE_I
#define TEMP_1_C   TGSI_EXEC_TEMP_ONE_C
#define TEMP_2_I   TGSI_EXEC_TEMP_TWO_I
#define TEMP_2_C   TGSI_EXEC_TEMP_TWO_C
#define TEMP_128_I   TGSI_EXEC_TEMP_128_I
#define TEMP_128_C   TGSI_EXEC_TEMP_128_C
#define TEMP_M128_I   TGSI_EXEC_TEMP_MINUS_128_I
#define TEMP_M128_C   TGSI_EXEC_TEMP_MINUS_128_C
#define TEMP_KILMASK_I   TGSI_EXEC_TEMP_KILMASK_I
#define TEMP_KILMASK_C   TGSI_EXEC_TEMP_KILMASK_C
#define TEMP_OUTPUT_I   TGSI_EXEC_TEMP_OUTPUT_I
#define TEMP_OUTPUT_C   TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
#define TEMP_R0   TGSI_EXEC_TEMP_R0
#define FOR_EACH_CHANNEL(CHAN)   for (CHAN = 0; CHAN < 4; CHAN++)
#define IS_CHANNEL_ENABLED(INST, CHAN)   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
#define IS_CHANNEL_ENABLED2(INST, CHAN)   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)
#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)
#define UPDATE_EXEC_MASK(MACH)   MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
 The execution mask depends on the conditional mask and the loop mask.
#define CHAN_X   0
#define CHAN_Y   1
#define CHAN_Z   2
#define CHAN_W   3
#define FETCH(VAL, INDEX, CHAN)   fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
#define STORE(VAL, INDEX, CHAN)   store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )

Typedefs

typedef void(* interpolation_func )(struct spu_exec_machine *mach, unsigned attrib, unsigned chan)

Functions

void spu_exec_machine_init (struct spu_exec_machine *mach, uint numSamplers, struct spu_sampler *samplers, unsigned processor)
 Initialize machine state by expanding tokens to full instructions, allocating temporary storage, setting up constants, etc.
static qword micro_abs (qword src)
static qword micro_ceil (qword src)
static qword micro_cos (qword src)
static qword micro_ddx (qword src)
static qword micro_ddy (qword src)
static qword micro_div (qword src0, qword src1)
static qword micro_flr (qword src)
static qword micro_frc (qword src)
static qword micro_ge (qword src0, qword src1)
static qword micro_lg2 (qword src)
static qword micro_lt (qword src0, qword src1)
static qword micro_max (qword src0, qword src1)
static qword micro_min (qword src0, qword src1)
static qword micro_neg (qword src)
static qword micro_set_sign (qword src)
static qword micro_pow (qword src0, qword src1)
static qword micro_rnd (qword src)
static qword micro_ishr (qword src0, qword src1)
static qword micro_trunc (qword src)
static qword micro_sin (qword src)
static qword micro_sqrt (qword src)
static void fetch_src_file_channel (const struct spu_exec_machine *mach, const uint file, const uint swizzle, const union spu_exec_channel *index, union spu_exec_channel *chan)
static void fetch_source (const struct spu_exec_machine *mach, union spu_exec_channel *chan, const struct tgsi_full_src_register *reg, const uint chan_index)
static void store_dest (struct spu_exec_machine *mach, const union spu_exec_channel *chan, const struct tgsi_full_dst_register *reg, const struct tgsi_full_instruction *inst, uint chan_index)
static void exec_kil (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst)
 Execute ARB-style KIL which is predicated by a src register.
static void exec_kilp (struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst)
 Execute NVIDIA-style KIL which is predicated by a condition code.
static void fetch_texel (struct spu_sampler *sampler, const union spu_exec_channel *s, const union spu_exec_channel *t, const union spu_exec_channel *p, float lodbias, union spu_exec_channel *r, union spu_exec_channel *g, union spu_exec_channel *b, union spu_exec_channel *a)
static void exec_tex (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected)
static void constant_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan)
static void linear_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan)
static void perspective_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan)
static void exec_declaration (struct spu_exec_machine *mach, const struct tgsi_full_declaration *decl)
static void exec_instruction (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, int *pc)
uint spu_exec_machine_run (struct spu_exec_machine *mach)
 Run TGSI interpreter.

Variables

static const qword br_shuf
static const qword bl_shuf
static const qword tl_shuf


Define Documentation

#define CHAN_W   3

Definition at line 133 of file spu_exec.c.

#define CHAN_X   0

Definition at line 130 of file spu_exec.c.

#define CHAN_Y   1

Definition at line 131 of file spu_exec.c.

#define CHAN_Z   2

Definition at line 132 of file spu_exec.c.

#define FETCH ( VAL,
INDEX,
CHAN   )     fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)

Definition at line 593 of file spu_exec.c.

#define FOR_EACH_CHANNEL ( CHAN   )     for (CHAN = 0; CHAN < 4; CHAN++)

Definition at line 107 of file spu_exec.c.

#define FOR_EACH_ENABLED_CHANNEL ( INST,
CHAN   ) 

Value:

FOR_EACH_CHANNEL( CHAN )\
      if (IS_CHANNEL_ENABLED( INST, CHAN ))

Definition at line 116 of file spu_exec.c.

#define FOR_EACH_ENABLED_CHANNEL2 ( INST,
CHAN   ) 

Value:

FOR_EACH_CHANNEL( CHAN )\
      if (IS_CHANNEL_ENABLED2( INST, CHAN ))

Definition at line 120 of file spu_exec.c.

#define IS_CHANNEL_ENABLED ( INST,
CHAN   )     ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))

Definition at line 110 of file spu_exec.c.

#define IS_CHANNEL_ENABLED2 ( INST,
CHAN   )     ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))

Definition at line 113 of file spu_exec.c.

#define STORE ( VAL,
INDEX,
CHAN   )     store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )

Definition at line 596 of file spu_exec.c.

#define TEMP_0_C   TGSI_EXEC_TEMP_00000000_C

Definition at line 84 of file spu_exec.c.

#define TEMP_0_I   TGSI_EXEC_TEMP_00000000_I

Definition at line 83 of file spu_exec.c.

#define TEMP_128_C   TGSI_EXEC_TEMP_128_C

Definition at line 96 of file spu_exec.c.

#define TEMP_128_I   TGSI_EXEC_TEMP_128_I

Definition at line 95 of file spu_exec.c.

#define TEMP_1_C   TGSI_EXEC_TEMP_ONE_C

Definition at line 92 of file spu_exec.c.

#define TEMP_1_I   TGSI_EXEC_TEMP_ONE_I

Definition at line 91 of file spu_exec.c.

#define TEMP_2_C   TGSI_EXEC_TEMP_TWO_C

Definition at line 94 of file spu_exec.c.

#define TEMP_2_I   TGSI_EXEC_TEMP_TWO_I

Definition at line 93 of file spu_exec.c.

#define TEMP_7F_C   TGSI_EXEC_TEMP_7FFFFFFF_C

Definition at line 86 of file spu_exec.c.

#define TEMP_7F_I   TGSI_EXEC_TEMP_7FFFFFFF_I

Definition at line 85 of file spu_exec.c.

#define TEMP_80_C   TGSI_EXEC_TEMP_80000000_C

Definition at line 88 of file spu_exec.c.

#define TEMP_80_I   TGSI_EXEC_TEMP_80000000_I

Definition at line 87 of file spu_exec.c.

#define TEMP_FF_C   TGSI_EXEC_TEMP_FFFFFFFF_C

Definition at line 90 of file spu_exec.c.

#define TEMP_FF_I   TGSI_EXEC_TEMP_FFFFFFFF_I

Definition at line 89 of file spu_exec.c.

#define TEMP_KILMASK_C   TGSI_EXEC_TEMP_KILMASK_C

Definition at line 100 of file spu_exec.c.

#define TEMP_KILMASK_I   TGSI_EXEC_TEMP_KILMASK_I

Definition at line 99 of file spu_exec.c.

#define TEMP_M128_C   TGSI_EXEC_TEMP_MINUS_128_C

Definition at line 98 of file spu_exec.c.

#define TEMP_M128_I   TGSI_EXEC_TEMP_MINUS_128_I

Definition at line 97 of file spu_exec.c.

#define TEMP_OUTPUT_C   TGSI_EXEC_TEMP_OUTPUT_C

Definition at line 102 of file spu_exec.c.

#define TEMP_OUTPUT_I   TGSI_EXEC_TEMP_OUTPUT_I

Definition at line 101 of file spu_exec.c.

#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C

Definition at line 104 of file spu_exec.c.

#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I

Definition at line 103 of file spu_exec.c.

#define TEMP_R0   TGSI_EXEC_TEMP_R0

Definition at line 105 of file spu_exec.c.

#define TILE_BOTTOM_LEFT   2

Definition at line 77 of file spu_exec.c.

#define TILE_BOTTOM_RIGHT   3

Definition at line 78 of file spu_exec.c.

#define TILE_TOP_LEFT   0

TGSI interpretor/executor.

Flow control information:

Since we operate on 'quads' (4 pixels or 4 vertices in parallel) flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special care since a condition may be true for some quad components but false for other components.

We basically execute all statements (even if they're in the part of an IF/ELSE clause that's "not taken") and use a special mask to control writing to destination registers. This is the ExecMask. See store_dest().

The ExecMask is computed from three other masks (CondMask, LoopMask and ContMask) which are controlled by the flow control instructions (namely: (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).

Authors: Michal Krol Brian Paul

Definition at line 75 of file spu_exec.c.

#define TILE_TOP_RIGHT   1

Definition at line 76 of file spu_exec.c.

#define UPDATE_EXEC_MASK ( MACH   )     MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask

The execution mask depends on the conditional mask and the loop mask.

Definition at line 126 of file spu_exec.c.


Typedef Documentation

typedef void(* interpolation_func)(struct spu_exec_machine *mach, unsigned attrib, unsigned chan)

Definition at line 832 of file spu_exec.c.


Function Documentation

static void constant_interpolation ( struct spu_exec_machine mach,
unsigned  attrib,
unsigned  chan 
) [static]

Definition at line 783 of file spu_exec.c.

References spu_interp_coef::a0, spu_exec_channel::f, spu_exec_channel::i, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, QUAD_SIZE, and spu_exec_vector::xyzw.

00787 {
00788    unsigned i;
00789 
00790    for( i = 0; i < QUAD_SIZE; i++ ) {
00791       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
00792    }
00793 }

static void exec_declaration ( struct spu_exec_machine mach,
const struct tgsi_full_declaration decl 
) [static]

Definition at line 838 of file spu_exec.c.

References ASSERT, constant_interpolation(), tgsi_full_declaration::Declaration, tgsi_full_declaration::DeclarationRange, tgsi_declaration::File, tgsi_declaration_range::First, interp(), tgsi_declaration::Interpolate, tgsi_declaration_range::Last, linear_interpolation(), NUM_CHANNELS, perspective_interpolation(), spu_exec_machine::Processor, TGSI_FILE_INPUT, TGSI_INTERPOLATE_CONSTANT, TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_PERSPECTIVE, TGSI_PROCESSOR_FRAGMENT, TGSI_WRITEMASK_XYZW, and tgsi_declaration::UsageMask.

00840 {
00841    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
00842       if( decl->Declaration.File == TGSI_FILE_INPUT ) {
00843          unsigned first, last, mask;
00844          interpolation_func interp;
00845 
00846          first = decl->DeclarationRange.First;
00847          last = decl->DeclarationRange.Last;
00848          mask = decl->Declaration.UsageMask;
00849 
00850          switch( decl->Declaration.Interpolate ) {
00851          case TGSI_INTERPOLATE_CONSTANT:
00852             interp = constant_interpolation;
00853             break;
00854 
00855          case TGSI_INTERPOLATE_LINEAR:
00856             interp = linear_interpolation;
00857             break;
00858 
00859          case TGSI_INTERPOLATE_PERSPECTIVE:
00860             interp = perspective_interpolation;
00861             break;
00862 
00863          default:
00864             ASSERT( 0 );
00865          }
00866 
00867          if( mask == TGSI_WRITEMASK_XYZW ) {
00868             unsigned i, j;
00869 
00870             for( i = first; i <= last; i++ ) {
00871                for( j = 0; j < NUM_CHANNELS; j++ ) {
00872                   interp( mach, i, j );
00873                }
00874             }
00875          }
00876          else {
00877             unsigned i, j;
00878 
00879             for( j = 0; j < NUM_CHANNELS; j++ ) {
00880                if( mask & (1 << j) ) {
00881                   for( i = first; i <= last; i++ ) {
00882                      interp( mach, i, j );
00883                   }
00884                }
00885             }
00886          }
00887       }
00888    }
00889 }

static void exec_instruction ( struct spu_exec_machine mach,
const struct tgsi_full_instruction inst,
int *  pc 
) [static]

Definition at line 892 of file spu_exec.c.

References ASSERT, spu_exec_machine::CallStack, spu_exec_machine::CallStackTop, CHAN_W, CHAN_X, CHAN_Y, CHAN_Z, spu_exec_machine::CondMask, spu_exec_machine::CondStack, spu_exec_machine::CondStackTop, spu_exec_machine::ContMask, spu_exec_machine::ContStack, spu_exec_machine::ContStackTop, exec_kil(), exec_kilp(), exec_tex(), spu_exec_machine::ExecMask, FALSE, FETCH, FOR_EACH_ENABLED_CHANNEL, spu_exec_machine::FuncMask, spu_exec_machine::FuncStack, spu_exec_machine::FuncStackTop, tgsi_full_instruction::Instruction, tgsi_full_instruction::InstructionExtLabel, IS_CHANNEL_ENABLED, tgsi_instruction_ext_label::Label, spu_exec_machine::LoopMask, spu_exec_machine::LoopStack, spu_exec_machine::LoopStackTop, micro_abs(), micro_ceil(), micro_cos(), micro_ddx(), micro_ddy(), micro_div(), micro_flr(), micro_frc(), micro_ge(), micro_ishr(), micro_lg2(), micro_lt(), micro_max(), micro_min(), micro_pow(), micro_rnd(), micro_sin(), micro_sqrt(), micro_trunc(), tgsi_instruction::Opcode, spu_exec_machine::Primitives, spu_exec_channel::q, STORE, TEMP_0_C, TEMP_0_I, TEMP_128_C, TEMP_128_I, TEMP_1_C, TEMP_1_I, TEMP_2_C, TEMP_2_I, TEMP_M128_C, TEMP_M128_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TEMP_PRIMITIVE_C, TEMP_PRIMITIVE_I, TGSI_EXEC_MAX_CALL_NESTING, TGSI_EXEC_MAX_COND_NESTING, TGSI_EXEC_MAX_LOOP_NESTING, TGSI_OPCODE_ABS, TGSI_OPCODE_ADD, TGSI_OPCODE_AND, TGSI_OPCODE_ARA, TGSI_OPCODE_ARL, TGSI_OPCODE_ARR, TGSI_OPCODE_BGNLOOP2, TGSI_OPCODE_BGNSUB, TGSI_OPCODE_BRA, TGSI_OPCODE_BRK, TGSI_OPCODE_CAL, TGSI_OPCODE_CEIL, TGSI_OPCODE_CLAMP, TGSI_OPCODE_CMP, TGSI_OPCODE_CND, TGSI_OPCODE_CND0, TGSI_OPCODE_CONT, TGSI_OPCODE_COS, TGSI_OPCODE_CROSSPRODUCT, TGSI_OPCODE_DDX, TGSI_OPCODE_DDY, TGSI_OPCODE_DIV, TGSI_OPCODE_DOT2ADD, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4, TGSI_OPCODE_DPH, TGSI_OPCODE_DST, TGSI_OPCODE_ELSE, TGSI_OPCODE_EMIT, TGSI_OPCODE_END, TGSI_OPCODE_ENDIF, TGSI_OPCODE_ENDLOOP, TGSI_OPCODE_ENDLOOP2, TGSI_OPCODE_ENDPRIM, TGSI_OPCODE_ENDREP, TGSI_OPCODE_ENDSUB, TGSI_OPCODE_EXP, TGSI_OPCODE_EXPBASE2, TGSI_OPCODE_FLOOR, TGSI_OPCODE_FRAC, TGSI_OPCODE_I2F, TGSI_OPCODE_IF, TGSI_OPCODE_INDEX, TGSI_OPCODE_KIL, TGSI_OPCODE_KILP, TGSI_OPCODE_LERP, TGSI_OPCODE_LIT, TGSI_OPCODE_LOG, TGSI_OPCODE_LOGBASE2, TGSI_OPCODE_LOOP, TGSI_OPCODE_MAD, TGSI_OPCODE_MAX, TGSI_OPCODE_MIN, TGSI_OPCODE_MOD, TGSI_OPCODE_MOV, TGSI_OPCODE_MUL, TGSI_OPCODE_MULTIPLYMATRIX, TGSI_OPCODE_NEGATE, TGSI_OPCODE_NOISE1, TGSI_OPCODE_NOISE2, TGSI_OPCODE_NOISE3, TGSI_OPCODE_NOISE4, TGSI_OPCODE_NOP, TGSI_OPCODE_NOT, TGSI_OPCODE_NRM, TGSI_OPCODE_OR, TGSI_OPCODE_PK2H, TGSI_OPCODE_PK2US, TGSI_OPCODE_PK4B, TGSI_OPCODE_PK4UB, TGSI_OPCODE_POPA, TGSI_OPCODE_POWER, TGSI_OPCODE_PUSHA, TGSI_OPCODE_RCC, TGSI_OPCODE_RCP, TGSI_OPCODE_REP, TGSI_OPCODE_RET, TGSI_OPCODE_RFL, TGSI_OPCODE_ROUND, TGSI_OPCODE_RSQ, TGSI_OPCODE_SAD, TGSI_OPCODE_SCS, TGSI_OPCODE_SEQ, TGSI_OPCODE_SFL, TGSI_OPCODE_SGE, TGSI_OPCODE_SGT, TGSI_OPCODE_SHL, TGSI_OPCODE_SHR, TGSI_OPCODE_SIN, TGSI_OPCODE_SLE, TGSI_OPCODE_SLT, TGSI_OPCODE_SNE, TGSI_OPCODE_SSG, TGSI_OPCODE_STR, TGSI_OPCODE_SUB, TGSI_OPCODE_SWZ, TGSI_OPCODE_TEX, TGSI_OPCODE_TRUNC, TGSI_OPCODE_TXB, TGSI_OPCODE_TXD, TGSI_OPCODE_TXF, TGSI_OPCODE_TXL, TGSI_OPCODE_TXP, TGSI_OPCODE_TXQ, TGSI_OPCODE_UP2H, TGSI_OPCODE_UP2US, TGSI_OPCODE_UP4B, TGSI_OPCODE_UP4UB, TGSI_OPCODE_X2D, TGSI_OPCODE_XOR, TRUE, spu_exec_channel::u, and UPDATE_EXEC_MASK.

00896 {
00897    uint chan_index;
00898    union spu_exec_channel r[8];
00899 
00900    (*pc)++;
00901 
00902    switch (inst->Instruction.Opcode) {
00903    case TGSI_OPCODE_ARL:
00904       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00905          FETCH( &r[0], 0, chan_index );
00906          r[0].q = si_cflts(r[0].q, 0);
00907          STORE( &r[0], 0, chan_index );
00908       }
00909       break;
00910 
00911    case TGSI_OPCODE_MOV:
00912    case TGSI_OPCODE_SWZ:
00913       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00914          FETCH( &r[0], 0, chan_index );
00915          STORE( &r[0], 0, chan_index );
00916       }
00917       break;
00918 
00919    case TGSI_OPCODE_LIT:
00920       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
00921          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
00922       }
00923 
00924       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00925          FETCH( &r[0], 0, CHAN_X );
00926          if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
00927             r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00928             STORE( &r[0], 0, CHAN_Y );
00929          }
00930 
00931          if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
00932             FETCH( &r[1], 0, CHAN_Y );
00933             r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00934 
00935             FETCH( &r[2], 0, CHAN_W );
00936             r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
00937             r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
00938             r[1].q = micro_pow(r[1].q, r[2].q);
00939 
00940             /* r0 = (r0 > 0.0) ? r1 : 0.0
00941              */
00942             r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00943             r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
00944                              r[0].q);
00945             STORE( &r[0], 0, CHAN_Z );
00946          }
00947       }
00948 
00949       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
00950          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
00951       }
00952       break;
00953 
00954    case TGSI_OPCODE_RCP:
00955    /* TGSI_OPCODE_RECIP */
00956       FETCH( &r[0], 0, CHAN_X );
00957       r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00958       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00959          STORE( &r[0], 0, chan_index );
00960       }
00961       break;
00962 
00963    case TGSI_OPCODE_RSQ:
00964    /* TGSI_OPCODE_RECIPSQRT */
00965       FETCH( &r[0], 0, CHAN_X );
00966       r[0].q = micro_sqrt(r[0].q);
00967       r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
00968       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00969          STORE( &r[0], 0, chan_index );
00970       }
00971       break;
00972 
00973    case TGSI_OPCODE_EXP:
00974       ASSERT (0);
00975       break;
00976 
00977    case TGSI_OPCODE_LOG:
00978       ASSERT (0);
00979       break;
00980 
00981    case TGSI_OPCODE_MUL:
00982       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
00983       {
00984          FETCH(&r[0], 0, chan_index);
00985          FETCH(&r[1], 1, chan_index);
00986 
00987          r[0].q = si_fm(r[0].q, r[1].q);
00988 
00989          STORE(&r[0], 0, chan_index);
00990       }
00991       break;
00992 
00993    case TGSI_OPCODE_ADD:
00994       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00995          FETCH( &r[0], 0, chan_index );
00996          FETCH( &r[1], 1, chan_index );
00997          r[0].q = si_fa(r[0].q, r[1].q);
00998          STORE( &r[0], 0, chan_index );
00999       }
01000       break;
01001 
01002    case TGSI_OPCODE_DP3:
01003    /* TGSI_OPCODE_DOT3 */
01004       FETCH( &r[0], 0, CHAN_X );
01005       FETCH( &r[1], 1, CHAN_X );
01006       r[0].q = si_fm(r[0].q, r[1].q);
01007 
01008       FETCH( &r[1], 0, CHAN_Y );
01009       FETCH( &r[2], 1, CHAN_Y );
01010       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01011 
01012 
01013       FETCH( &r[1], 0, CHAN_Z );
01014       FETCH( &r[2], 1, CHAN_Z );
01015       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01016 
01017       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01018          STORE( &r[0], 0, chan_index );
01019       }
01020       break;
01021 
01022     case TGSI_OPCODE_DP4:
01023     /* TGSI_OPCODE_DOT4 */
01024        FETCH(&r[0], 0, CHAN_X);
01025        FETCH(&r[1], 1, CHAN_X);
01026 
01027       r[0].q = si_fm(r[0].q, r[1].q);
01028 
01029        FETCH(&r[1], 0, CHAN_Y);
01030        FETCH(&r[2], 1, CHAN_Y);
01031 
01032       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01033 
01034        FETCH(&r[1], 0, CHAN_Z);
01035        FETCH(&r[2], 1, CHAN_Z);
01036 
01037       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01038 
01039        FETCH(&r[1], 0, CHAN_W);
01040        FETCH(&r[2], 1, CHAN_W);
01041 
01042       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01043 
01044       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01045          STORE( &r[0], 0, chan_index );
01046       }
01047       break;
01048 
01049    case TGSI_OPCODE_DST:
01050       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01051          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01052       }
01053 
01054       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01055          FETCH( &r[0], 0, CHAN_Y );
01056          FETCH( &r[1], 1, CHAN_Y);
01057       r[0].q = si_fm(r[0].q, r[1].q);
01058          STORE( &r[0], 0, CHAN_Y );
01059       }
01060 
01061       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01062          FETCH( &r[0], 0, CHAN_Z );
01063          STORE( &r[0], 0, CHAN_Z );
01064       }
01065 
01066       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01067          FETCH( &r[0], 1, CHAN_W );
01068          STORE( &r[0], 0, CHAN_W );
01069       }
01070       break;
01071 
01072    case TGSI_OPCODE_MIN:
01073       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01074          FETCH(&r[0], 0, chan_index);
01075          FETCH(&r[1], 1, chan_index);
01076 
01077          r[0].q = micro_min(r[0].q, r[1].q);
01078 
01079          STORE(&r[0], 0, chan_index);
01080       }
01081       break;
01082 
01083    case TGSI_OPCODE_MAX:
01084       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01085          FETCH(&r[0], 0, chan_index);
01086          FETCH(&r[1], 1, chan_index);
01087 
01088          r[0].q = micro_max(r[0].q, r[1].q);
01089 
01090          STORE(&r[0], 0, chan_index );
01091       }
01092       break;
01093 
01094    case TGSI_OPCODE_SLT:
01095    /* TGSI_OPCODE_SETLT */
01096       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01097          FETCH( &r[0], 0, chan_index );
01098          FETCH( &r[1], 1, chan_index );
01099 
01100          r[0].q = micro_ge(r[0].q, r[1].q);
01101          r[0].q = si_xori(r[0].q, 0xff);
01102 
01103          STORE( &r[0], 0, chan_index );
01104       }
01105       break;
01106 
01107    case TGSI_OPCODE_SGE:
01108    /* TGSI_OPCODE_SETGE */
01109       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01110          FETCH( &r[0], 0, chan_index );
01111          FETCH( &r[1], 1, chan_index );
01112          r[0].q = micro_ge(r[0].q, r[1].q);
01113          STORE( &r[0], 0, chan_index );
01114       }
01115       break;
01116 
01117    case TGSI_OPCODE_MAD:
01118    /* TGSI_OPCODE_MADD */
01119       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01120          FETCH( &r[0], 0, chan_index );
01121          FETCH( &r[1], 1, chan_index );
01122          FETCH( &r[2], 2, chan_index );
01123          r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01124          STORE( &r[0], 0, chan_index );
01125       }
01126       break;
01127 
01128    case TGSI_OPCODE_SUB:
01129       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01130          FETCH(&r[0], 0, chan_index);
01131          FETCH(&r[1], 1, chan_index);
01132 
01133          r[0].q = si_fs(r[0].q, r[1].q);
01134 
01135          STORE(&r[0], 0, chan_index);
01136       }
01137       break;
01138 
01139    case TGSI_OPCODE_LERP:
01140    /* TGSI_OPCODE_LRP */
01141       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01142          FETCH(&r[0], 0, chan_index);
01143          FETCH(&r[1], 1, chan_index);
01144          FETCH(&r[2], 2, chan_index);
01145 
01146          r[1].q = si_fs(r[1].q, r[2].q);
01147          r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
01148 
01149          STORE(&r[0], 0, chan_index);
01150       }
01151       break;
01152 
01153    case TGSI_OPCODE_CND:
01154       ASSERT (0);
01155       break;
01156 
01157    case TGSI_OPCODE_CND0:
01158       ASSERT (0);
01159       break;
01160 
01161    case TGSI_OPCODE_DOT2ADD:
01162       /* TGSI_OPCODE_DP2A */
01163       ASSERT (0);
01164       break;
01165 
01166    case TGSI_OPCODE_INDEX:
01167       ASSERT (0);
01168       break;
01169 
01170    case TGSI_OPCODE_NEGATE:
01171       ASSERT (0);
01172       break;
01173 
01174    case TGSI_OPCODE_FRAC:
01175    /* TGSI_OPCODE_FRC */
01176       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01177          FETCH( &r[0], 0, chan_index );
01178          r[0].q = micro_frc(r[0].q);
01179          STORE( &r[0], 0, chan_index );
01180       }
01181       break;
01182 
01183    case TGSI_OPCODE_CLAMP:
01184       ASSERT (0);
01185       break;
01186 
01187    case TGSI_OPCODE_FLOOR:
01188    /* TGSI_OPCODE_FLR */
01189       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01190          FETCH( &r[0], 0, chan_index );
01191          r[0].q = micro_flr(r[0].q);
01192          STORE( &r[0], 0, chan_index );
01193       }
01194       break;
01195 
01196    case TGSI_OPCODE_ROUND:
01197       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01198          FETCH( &r[0], 0, chan_index );
01199          r[0].q = micro_rnd(r[0].q);
01200          STORE( &r[0], 0, chan_index );
01201       }
01202       break;
01203 
01204    case TGSI_OPCODE_EXPBASE2:
01205     /* TGSI_OPCODE_EX2 */
01206       FETCH(&r[0], 0, CHAN_X);
01207 
01208       r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
01209 
01210       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01211          STORE( &r[0], 0, chan_index );
01212       }
01213       break;
01214 
01215    case TGSI_OPCODE_LOGBASE2:
01216    /* TGSI_OPCODE_LG2 */
01217       FETCH( &r[0], 0, CHAN_X );
01218       r[0].q = micro_lg2(r[0].q);
01219       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01220          STORE( &r[0], 0, chan_index );
01221       }
01222       break;
01223 
01224    case TGSI_OPCODE_POWER:
01225       /* TGSI_OPCODE_POW */
01226       FETCH(&r[0], 0, CHAN_X);
01227       FETCH(&r[1], 1, CHAN_X);
01228 
01229       r[0].q = micro_pow(r[0].q, r[1].q);
01230 
01231       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01232          STORE( &r[0], 0, chan_index );
01233       }
01234       break;
01235 
01236    case TGSI_OPCODE_CROSSPRODUCT:
01237       /* TGSI_OPCODE_XPD */
01238       FETCH(&r[0], 0, CHAN_Y);
01239       FETCH(&r[1], 1, CHAN_Z);
01240       FETCH(&r[3], 0, CHAN_Z);
01241       FETCH(&r[4], 1, CHAN_Y);
01242 
01243       /* r2 = (r0 * r1) - (r3 * r5)
01244        */
01245       r[2].q = si_fm(r[3].q, r[5].q);
01246       r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
01247 
01248       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01249          STORE( &r[2], 0, CHAN_X );
01250       }
01251 
01252       FETCH(&r[2], 1, CHAN_X);
01253       FETCH(&r[5], 0, CHAN_X);
01254 
01255       /* r3 = (r3 * r2) - (r1 * r5)
01256        */
01257       r[1].q = si_fm(r[1].q, r[5].q);
01258       r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
01259 
01260       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01261          STORE( &r[3], 0, CHAN_Y );
01262       }
01263 
01264       /* r5 = (r5 * r4) - (r0 * r2)
01265        */
01266       r[0].q = si_fm(r[0].q, r[2].q);
01267       r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
01268 
01269       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01270          STORE( &r[5], 0, CHAN_Z );
01271       }
01272 
01273       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01274          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01275       }
01276       break;
01277 
01278     case TGSI_OPCODE_MULTIPLYMATRIX:
01279        ASSERT (0);
01280        break;
01281 
01282     case TGSI_OPCODE_ABS:
01283        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01284           FETCH(&r[0], 0, chan_index);
01285 
01286           r[0].q = micro_abs(r[0].q);
01287 
01288           STORE(&r[0], 0, chan_index);
01289        }
01290        break;
01291 
01292    case TGSI_OPCODE_RCC:
01293       ASSERT (0);
01294       break;
01295 
01296    case TGSI_OPCODE_DPH:
01297       FETCH(&r[0], 0, CHAN_X);
01298       FETCH(&r[1], 1, CHAN_X);
01299 
01300       r[0].q = si_fm(r[0].q, r[1].q);
01301 
01302       FETCH(&r[1], 0, CHAN_Y);
01303       FETCH(&r[2], 1, CHAN_Y);
01304 
01305       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01306 
01307       FETCH(&r[1], 0, CHAN_Z);
01308       FETCH(&r[2], 1, CHAN_Z);
01309 
01310       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01311 
01312       FETCH(&r[1], 1, CHAN_W);
01313 
01314       r[0].q = si_fa(r[0].q, r[1].q);
01315 
01316       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01317          STORE( &r[0], 0, chan_index );
01318       }
01319       break;
01320 
01321    case TGSI_OPCODE_COS:
01322       FETCH(&r[0], 0, CHAN_X);
01323 
01324       r[0].q = micro_cos(r[0].q);
01325 
01326       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01327          STORE( &r[0], 0, chan_index );
01328       }
01329       break;
01330 
01331    case TGSI_OPCODE_DDX:
01332       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01333          FETCH( &r[0], 0, chan_index );
01334          r[0].q = micro_ddx(r[0].q);
01335          STORE( &r[0], 0, chan_index );
01336       }
01337       break;
01338 
01339    case TGSI_OPCODE_DDY:
01340       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01341          FETCH( &r[0], 0, chan_index );
01342          r[0].q = micro_ddy(r[0].q);
01343          STORE( &r[0], 0, chan_index );
01344       }
01345       break;
01346 
01347    case TGSI_OPCODE_KILP:
01348       exec_kilp (mach, inst);
01349       break;
01350 
01351    case TGSI_OPCODE_KIL:
01352       exec_kil (mach, inst);
01353       break;
01354 
01355    case TGSI_OPCODE_PK2H:
01356       ASSERT (0);
01357       break;
01358 
01359    case TGSI_OPCODE_PK2US:
01360       ASSERT (0);
01361       break;
01362 
01363    case TGSI_OPCODE_PK4B:
01364       ASSERT (0);
01365       break;
01366 
01367    case TGSI_OPCODE_PK4UB:
01368       ASSERT (0);
01369       break;
01370 
01371    case TGSI_OPCODE_RFL:
01372       ASSERT (0);
01373       break;
01374 
01375    case TGSI_OPCODE_SEQ:
01376       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01377          FETCH( &r[0], 0, chan_index );
01378          FETCH( &r[1], 1, chan_index );
01379 
01380          r[0].q = si_fceq(r[0].q, r[1].q);
01381 
01382          STORE( &r[0], 0, chan_index );
01383       }
01384       break;
01385 
01386    case TGSI_OPCODE_SFL:
01387       ASSERT (0);
01388       break;
01389 
01390    case TGSI_OPCODE_SGT:
01391       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01392          FETCH( &r[0], 0, chan_index );
01393          FETCH( &r[1], 1, chan_index );
01394          r[0].q = si_fcgt(r[0].q, r[1].q);
01395          STORE( &r[0], 0, chan_index );
01396       }
01397       break;
01398 
01399    case TGSI_OPCODE_SIN:
01400       FETCH( &r[0], 0, CHAN_X );
01401       r[0].q = micro_sin(r[0].q);
01402       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01403          STORE( &r[0], 0, chan_index );
01404       }
01405       break;
01406 
01407    case TGSI_OPCODE_SLE:
01408       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01409          FETCH( &r[0], 0, chan_index );
01410          FETCH( &r[1], 1, chan_index );
01411 
01412          r[0].q = si_fcgt(r[0].q, r[1].q);
01413          r[0].q = si_xori(r[0].q, 0xff);
01414 
01415          STORE( &r[0], 0, chan_index );
01416       }
01417       break;
01418 
01419    case TGSI_OPCODE_SNE:
01420       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01421          FETCH( &r[0], 0, chan_index );
01422          FETCH( &r[1], 1, chan_index );
01423 
01424          r[0].q = si_fceq(r[0].q, r[1].q);
01425          r[0].q = si_xori(r[0].q, 0xff);
01426 
01427          STORE( &r[0], 0, chan_index );
01428       }
01429       break;
01430 
01431    case TGSI_OPCODE_STR:
01432       ASSERT (0);
01433       break;
01434 
01435    case TGSI_OPCODE_TEX:
01436       /* simple texture lookup */
01437       /* src[0] = texcoord */
01438       /* src[1] = sampler unit */
01439       exec_tex(mach, inst, FALSE, FALSE);
01440       break;
01441 
01442    case TGSI_OPCODE_TXB:
01443       /* Texture lookup with lod bias */
01444       /* src[0] = texcoord (src[0].w = load bias) */
01445       /* src[1] = sampler unit */
01446       exec_tex(mach, inst, TRUE, FALSE);
01447       break;
01448 
01449    case TGSI_OPCODE_TXD:
01450       /* Texture lookup with explict partial derivatives */
01451       /* src[0] = texcoord */
01452       /* src[1] = d[strq]/dx */
01453       /* src[2] = d[strq]/dy */
01454       /* src[3] = sampler unit */
01455       ASSERT (0);
01456       break;
01457 
01458    case TGSI_OPCODE_TXL:
01459       /* Texture lookup with explit LOD */
01460       /* src[0] = texcoord (src[0].w = load bias) */
01461       /* src[1] = sampler unit */
01462       exec_tex(mach, inst, TRUE, FALSE);
01463       break;
01464 
01465    case TGSI_OPCODE_TXP:
01466       /* Texture lookup with projection */
01467       /* src[0] = texcoord (src[0].w = projection) */
01468       /* src[1] = sampler unit */
01469       exec_tex(mach, inst, TRUE, TRUE);
01470       break;
01471 
01472    case TGSI_OPCODE_UP2H:
01473       ASSERT (0);
01474       break;
01475 
01476    case TGSI_OPCODE_UP2US:
01477       ASSERT (0);
01478       break;
01479 
01480    case TGSI_OPCODE_UP4B:
01481       ASSERT (0);
01482       break;
01483 
01484    case TGSI_OPCODE_UP4UB:
01485       ASSERT (0);
01486       break;
01487 
01488    case TGSI_OPCODE_X2D:
01489       ASSERT (0);
01490       break;
01491 
01492    case TGSI_OPCODE_ARA:
01493       ASSERT (0);
01494       break;
01495 
01496    case TGSI_OPCODE_ARR:
01497       ASSERT (0);
01498       break;
01499 
01500    case TGSI_OPCODE_BRA:
01501       ASSERT (0);
01502       break;
01503 
01504    case TGSI_OPCODE_CAL:
01505       /* skip the call if no execution channels are enabled */
01506       if (mach->ExecMask) {
01507          /* do the call */
01508 
01509          /* push the Cond, Loop, Cont stacks */
01510          ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01511          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01512          ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01513          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01514          ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01515          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01516 
01517          ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
01518          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
01519 
01520          /* note that PC was already incremented above */
01521          mach->CallStack[mach->CallStackTop++] = *pc;
01522          *pc = inst->InstructionExtLabel.Label;
01523       }
01524       break;
01525 
01526    case TGSI_OPCODE_RET:
01527       mach->FuncMask &= ~mach->ExecMask;
01528       UPDATE_EXEC_MASK(mach);
01529 
01530       if (mach->ExecMask == 0x0) {
01531          /* really return now (otherwise, keep executing */
01532 
01533          if (mach->CallStackTop == 0) {
01534             /* returning from main() */
01535             *pc = -1;
01536             return;
01537          }
01538          *pc = mach->CallStack[--mach->CallStackTop];
01539 
01540          /* pop the Cond, Loop, Cont stacks */
01541          ASSERT(mach->CondStackTop > 0);
01542          mach->CondMask = mach->CondStack[--mach->CondStackTop];
01543          ASSERT(mach->LoopStackTop > 0);
01544          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01545          ASSERT(mach->ContStackTop > 0);
01546          mach->ContMask = mach->ContStack[--mach->ContStackTop];
01547          ASSERT(mach->FuncStackTop > 0);
01548          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
01549 
01550          UPDATE_EXEC_MASK(mach);
01551       }
01552       break;
01553 
01554    case TGSI_OPCODE_SSG:
01555       ASSERT (0);
01556       break;
01557 
01558    case TGSI_OPCODE_CMP:
01559       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01560          FETCH(&r[0], 0, chan_index);
01561          FETCH(&r[1], 1, chan_index);
01562          FETCH(&r[2], 2, chan_index);
01563 
01564          /* r0 = (r0 < 0.0) ? r1 : r2
01565           */
01566          r[3].q = si_xor(r[3].q, r[3].q);
01567          r[0].q = micro_lt(r[0].q, r[3].q);
01568          r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
01569 
01570          STORE(&r[0], 0, chan_index);
01571       }
01572       break;
01573 
01574    case TGSI_OPCODE_SCS:
01575       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01576          FETCH( &r[0], 0, CHAN_X );
01577       }
01578       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
01579          r[1].q = micro_cos(r[0].q);
01580          STORE( &r[1], 0, CHAN_X );
01581       }
01582       if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01583          r[1].q = micro_sin(r[0].q);
01584          STORE( &r[1], 0, CHAN_Y );
01585       }
01586       if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01587          STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
01588       }
01589       if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
01590          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01591       }
01592       break;
01593 
01594    case TGSI_OPCODE_NRM:
01595       ASSERT (0);
01596       break;
01597 
01598    case TGSI_OPCODE_DIV:
01599       ASSERT( 0 );
01600       break;
01601 
01602    case TGSI_OPCODE_DP2:
01603       FETCH( &r[0], 0, CHAN_X );
01604       FETCH( &r[1], 1, CHAN_X );
01605       r[0].q = si_fm(r[0].q, r[1].q);
01606 
01607       FETCH( &r[1], 0, CHAN_Y );
01608       FETCH( &r[2], 1, CHAN_Y );
01609       r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
01610 
01611       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01612          STORE( &r[0], 0, chan_index );
01613       }
01614       break;
01615 
01616    case TGSI_OPCODE_IF:
01617       /* push CondMask */
01618       ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
01619       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
01620       FETCH( &r[0], 0, CHAN_X );
01621       /* update CondMask */
01622       if( ! r[0].u[0] ) {
01623          mach->CondMask &= ~0x1;
01624       }
01625       if( ! r[0].u[1] ) {
01626          mach->CondMask &= ~0x2;
01627       }
01628       if( ! r[0].u[2] ) {
01629          mach->CondMask &= ~0x4;
01630       }
01631       if( ! r[0].u[3] ) {
01632          mach->CondMask &= ~0x8;
01633       }
01634       UPDATE_EXEC_MASK(mach);
01635       /* Todo: If CondMask==0, jump to ELSE */
01636       break;
01637 
01638    case TGSI_OPCODE_ELSE:
01639       /* invert CondMask wrt previous mask */
01640       {
01641          uint prevMask;
01642          ASSERT(mach->CondStackTop > 0);
01643          prevMask = mach->CondStack[mach->CondStackTop - 1];
01644          mach->CondMask = ~mach->CondMask & prevMask;
01645          UPDATE_EXEC_MASK(mach);
01646          /* Todo: If CondMask==0, jump to ENDIF */
01647       }
01648       break;
01649 
01650    case TGSI_OPCODE_ENDIF:
01651       /* pop CondMask */
01652       ASSERT(mach->CondStackTop > 0);
01653       mach->CondMask = mach->CondStack[--mach->CondStackTop];
01654       UPDATE_EXEC_MASK(mach);
01655       break;
01656 
01657    case TGSI_OPCODE_END:
01658       /* halt execution */
01659       *pc = -1;
01660       break;
01661 
01662    case TGSI_OPCODE_REP:
01663       ASSERT (0);
01664       break;
01665 
01666    case TGSI_OPCODE_ENDREP:
01667        ASSERT (0);
01668        break;
01669 
01670    case TGSI_OPCODE_PUSHA:
01671       ASSERT (0);
01672       break;
01673 
01674    case TGSI_OPCODE_POPA:
01675       ASSERT (0);
01676       break;
01677 
01678    case TGSI_OPCODE_CEIL:
01679       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01680          FETCH( &r[0], 0, chan_index );
01681          r[0].q = micro_ceil(r[0].q);
01682          STORE( &r[0], 0, chan_index );
01683       }
01684       break;
01685 
01686    case TGSI_OPCODE_I2F:
01687       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01688          FETCH( &r[0], 0, chan_index );
01689          r[0].q = si_csflt(r[0].q, 0);
01690          STORE( &r[0], 0, chan_index );
01691       }
01692       break;
01693 
01694    case TGSI_OPCODE_NOT:
01695       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01696          FETCH( &r[0], 0, chan_index );
01697          r[0].q = si_xorbi(r[0].q, 0xff);
01698          STORE( &r[0], 0, chan_index );
01699       }
01700       break;
01701 
01702    case TGSI_OPCODE_TRUNC:
01703       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01704          FETCH( &r[0], 0, chan_index );
01705          r[0].q = micro_trunc(r[0].q);
01706          STORE( &r[0], 0, chan_index );
01707       }
01708       break;
01709 
01710    case TGSI_OPCODE_SHL:
01711       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01712          FETCH( &r[0], 0, chan_index );
01713          FETCH( &r[1], 1, chan_index );
01714 
01715          r[0].q = si_shl(r[0].q, r[1].q);
01716 
01717          STORE( &r[0], 0, chan_index );
01718       }
01719       break;
01720 
01721    case TGSI_OPCODE_SHR:
01722       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01723          FETCH( &r[0], 0, chan_index );
01724          FETCH( &r[1], 1, chan_index );
01725          r[0].q = micro_ishr(r[0].q, r[1].q);
01726          STORE( &r[0], 0, chan_index );
01727       }
01728       break;
01729 
01730    case TGSI_OPCODE_AND:
01731       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01732          FETCH( &r[0], 0, chan_index );
01733          FETCH( &r[1], 1, chan_index );
01734          r[0].q = si_and(r[0].q, r[1].q);
01735          STORE( &r[0], 0, chan_index );
01736       }
01737       break;
01738 
01739    case TGSI_OPCODE_OR:
01740       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01741          FETCH( &r[0], 0, chan_index );
01742          FETCH( &r[1], 1, chan_index );
01743          r[0].q = si_or(r[0].q, r[1].q);
01744          STORE( &r[0], 0, chan_index );
01745       }
01746       break;
01747 
01748    case TGSI_OPCODE_MOD:
01749       ASSERT (0);
01750       break;
01751 
01752    case TGSI_OPCODE_XOR:
01753       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01754          FETCH( &r[0], 0, chan_index );
01755          FETCH( &r[1], 1, chan_index );
01756          r[0].q = si_xor(r[0].q, r[1].q);
01757          STORE( &r[0], 0, chan_index );
01758       }
01759       break;
01760 
01761    case TGSI_OPCODE_SAD:
01762       ASSERT (0);
01763       break;
01764 
01765    case TGSI_OPCODE_TXF:
01766       ASSERT (0);
01767       break;
01768 
01769    case TGSI_OPCODE_TXQ:
01770       ASSERT (0);
01771       break;
01772 
01773    case TGSI_OPCODE_EMIT:
01774       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
01775       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
01776       break;
01777 
01778    case TGSI_OPCODE_ENDPRIM:
01779       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
01780       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
01781       break;
01782 
01783    case TGSI_OPCODE_LOOP:
01784       /* fall-through (for now) */
01785    case TGSI_OPCODE_BGNLOOP2:
01786       /* push LoopMask and ContMasks */
01787       ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01788       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
01789       ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
01790       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
01791       break;
01792 
01793    case TGSI_OPCODE_ENDLOOP:
01794       /* fall-through (for now at least) */
01795    case TGSI_OPCODE_ENDLOOP2:
01796       /* Restore ContMask, but don't pop */
01797       ASSERT(mach->ContStackTop > 0);
01798       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
01799       if (mach->LoopMask) {
01800          /* repeat loop: jump to instruction just past BGNLOOP */
01801          *pc = inst->InstructionExtLabel.Label + 1;
01802       }
01803       else {
01804          /* exit loop: pop LoopMask */
01805          ASSERT(mach->LoopStackTop > 0);
01806          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
01807          /* pop ContMask */
01808          ASSERT(mach->ContStackTop > 0);
01809          mach->ContMask = mach->ContStack[--mach->ContStackTop];
01810       }
01811       UPDATE_EXEC_MASK(mach);
01812       break;
01813 
01814    case TGSI_OPCODE_BRK:
01815       /* turn off loop channels for each enabled exec channel */
01816       mach->LoopMask &= ~mach->ExecMask;
01817       /* Todo: if mach->LoopMask == 0, jump to end of loop */
01818       UPDATE_EXEC_MASK(mach);
01819       break;
01820 
01821    case TGSI_OPCODE_CONT:
01822       /* turn off cont channels for each enabled exec channel */
01823       mach->ContMask &= ~mach->ExecMask;
01824       /* Todo: if mach->LoopMask == 0, jump to end of loop */
01825       UPDATE_EXEC_MASK(mach);
01826       break;
01827 
01828    case TGSI_OPCODE_BGNSUB:
01829       /* no-op */
01830       break;
01831 
01832    case TGSI_OPCODE_ENDSUB:
01833       /* no-op */
01834       break;
01835 
01836    case TGSI_OPCODE_NOISE1:
01837       ASSERT( 0 );
01838       break;
01839 
01840    case TGSI_OPCODE_NOISE2:
01841       ASSERT( 0 );
01842       break;
01843 
01844    case TGSI_OPCODE_NOISE3:
01845       ASSERT( 0 );
01846       break;
01847 
01848    case TGSI_OPCODE_NOISE4:
01849       ASSERT( 0 );
01850       break;
01851 
01852    case TGSI_OPCODE_NOP:
01853       break;
01854 
01855    default:
01856       ASSERT( 0 );
01857    }
01858 }

static void exec_kil ( struct spu_exec_machine mach,
const struct tgsi_full_instruction inst 
) [static]

Execute ARB-style KIL which is predicated by a src register.

Kill fragment if any of the four values is less than zero.

Definition at line 605 of file spu_exec.c.

References spu_exec_channel::f, FETCH, tgsi_full_instruction::FullSrcRegisters, spu_exec_channel::i, swizzle(), TEMP_KILMASK_C, TEMP_KILMASK_I, TGSI_EXTSWIZZLE_ONE, TGSI_EXTSWIZZLE_ZERO, and tgsi_util_get_full_src_register_extswizzle().

00607 {
00608    uint uniquemask;
00609    uint chan_index;
00610    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
00611    union spu_exec_channel r[1];
00612 
00613    /* This mask stores component bits that were already tested. Note that
00614     * we test if the value is less than zero, so 1.0 and 0.0 need not to be
00615     * tested. */
00616    uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
00617 
00618    for (chan_index = 0; chan_index < 4; chan_index++)
00619    {
00620       uint swizzle;
00621       uint i;
00622 
00623       /* unswizzle channel */
00624       swizzle = tgsi_util_get_full_src_register_extswizzle (
00625                         &inst->FullSrcRegisters[0],
00626                         chan_index);
00627 
00628       /* check if the component has not been already tested */
00629       if (uniquemask & (1 << swizzle))
00630          continue;
00631       uniquemask |= 1 << swizzle;
00632 
00633       FETCH(&r[0], 0, chan_index);
00634       for (i = 0; i < 4; i++)
00635          if (r[0].f[i] < 0.0f)
00636             kilmask |= 1 << i;
00637    }
00638 
00639    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00640 }

static void exec_kilp ( struct tgsi_exec_machine mach,
const struct tgsi_full_instruction inst 
) [static]

Execute NVIDIA-style KIL which is predicated by a condition code.

Kill fragment if the condition code is TRUE.

Definition at line 647 of file spu_exec.c.

References TEMP_KILMASK_C, TEMP_KILMASK_I, tgsi_exec_machine::Temps, tgsi_exec_channel::u, and tgsi_exec_vector::xyzw.

00649 {
00650    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
00651 
00652    /* TODO: build kilmask from CC mask */
00653 
00654    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
00655 }

static void exec_tex ( struct spu_exec_machine mach,
const struct tgsi_full_instruction inst,
boolean  biasLod,
boolean  projected 
) [static]

Definition at line 686 of file spu_exec.c.

References ASSERT, CHAN_W, CHAN_X, CHAN_Y, CHAN_Z, spu_exec_channel::f, FETCH, fetch_texel(), FOR_EACH_ENABLED_CHANNEL, tgsi_full_instruction::FullSrcRegisters, tgsi_src_register::Index, tgsi_full_instruction::InstructionExtTexture, micro_div(), spu_exec_channel::q, spu_exec_machine::Samplers, tgsi_full_src_register::SrcRegister, STORE, tgsi_instruction_ext_texture::Texture, TGSI_TEXTURE_1D, TGSI_TEXTURE_2D, TGSI_TEXTURE_3D, TGSI_TEXTURE_CUBE, and TGSI_TEXTURE_RECT.

00689 {
00690    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
00691    union spu_exec_channel r[8];
00692    uint chan_index;
00693    float lodBias;
00694 
00695    /*   printf("Sampler %u unit %u\n", sampler, unit); */
00696 
00697    switch (inst->InstructionExtTexture.Texture) {
00698    case TGSI_TEXTURE_1D:
00699 
00700       FETCH(&r[0], 0, CHAN_X);
00701 
00702       if (projected) {
00703          FETCH(&r[1], 0, CHAN_W);
00704          r[0].q = micro_div(r[0].q, r[1].q);
00705       }
00706 
00707       if (biasLod) {
00708          FETCH(&r[1], 0, CHAN_W);
00709          lodBias = r[2].f[0];
00710       }
00711       else
00712          lodBias = 0.0;
00713 
00714       fetch_texel(&mach->Samplers[unit],
00715                   &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
00716                   &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
00717       break;
00718 
00719    case TGSI_TEXTURE_2D:
00720    case TGSI_TEXTURE_RECT:
00721 
00722       FETCH(&r[0], 0, CHAN_X);
00723       FETCH(&r[1], 0, CHAN_Y);
00724       FETCH(&r[2], 0, CHAN_Z);
00725 
00726       if (projected) {
00727          FETCH(&r[3], 0, CHAN_W);
00728          r[0].q = micro_div(r[0].q, r[3].q);
00729          r[1].q = micro_div(r[1].q, r[3].q);
00730          r[2].q = micro_div(r[2].q, r[3].q);
00731       }
00732 
00733       if (biasLod) {
00734          FETCH(&r[3], 0, CHAN_W);
00735          lodBias = r[3].f[0];
00736       }
00737       else
00738          lodBias = 0.0;
00739 
00740       fetch_texel(&mach->Samplers[unit],
00741                   &r[0], &r[1], &r[2], lodBias,  /* inputs */
00742                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
00743       break;
00744 
00745    case TGSI_TEXTURE_3D:
00746    case TGSI_TEXTURE_CUBE:
00747 
00748       FETCH(&r[0], 0, CHAN_X);
00749       FETCH(&r[1], 0, CHAN_Y);
00750       FETCH(&r[2], 0, CHAN_Z);
00751 
00752       if (projected) {
00753          FETCH(&r[3], 0, CHAN_W);
00754          r[0].q = micro_div(r[0].q, r[3].q);
00755          r[1].q = micro_div(r[1].q, r[3].q);
00756          r[2].q = micro_div(r[2].q, r[3].q);
00757       }
00758 
00759       if (biasLod) {
00760          FETCH(&r[3], 0, CHAN_W);
00761          lodBias = r[3].f[0];
00762       }
00763       else
00764          lodBias = 0.0;
00765 
00766       fetch_texel(&mach->Samplers[unit],
00767                   &r[0], &r[1], &r[2], lodBias,
00768                   &r[0], &r[1], &r[2], &r[3]);
00769       break;
00770 
00771    default:
00772       ASSERT (0);
00773    }
00774 
00775    FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
00776       STORE( &r[chan_index], 0, chan_index );
00777    }
00778 }

static void fetch_source ( const struct spu_exec_machine mach,
union spu_exec_channel chan,
const struct tgsi_full_src_register reg,
const uint  chan_index 
) [static]

Definition at line 430 of file spu_exec.c.

References ASSERT, CHAN_X, tgsi_src_register_ext_mod::Complement, tgsi_src_register::Dimension, fetch_src_file_channel(), tgsi_src_register::File, spu_exec_channel::i, tgsi_dimension::Index, tgsi_src_register::Index, tgsi_dimension::Indirect, tgsi_src_register::Indirect, micro_abs(), micro_neg(), micro_set_sign(), spu_exec_channel::q, tgsi_full_src_register::SrcRegister, tgsi_full_src_register::SrcRegisterDim, tgsi_full_src_register::SrcRegisterDimInd, tgsi_full_src_register::SrcRegisterExtMod, tgsi_full_src_register::SrcRegisterInd, swizzle(), TEMP_1_C, TEMP_1_I, TGSI_FILE_CONSTANT, TGSI_FILE_INPUT, tgsi_util_get_full_src_register_extswizzle(), tgsi_util_get_full_src_register_sign_mode(), tgsi_util_get_src_register_swizzle(), TGSI_UTIL_SIGN_CLEAR, TGSI_UTIL_SIGN_KEEP, TGSI_UTIL_SIGN_SET, and TGSI_UTIL_SIGN_TOGGLE.

00435 {
00436    union spu_exec_channel index;
00437    uint swizzle;
00438 
00439    index.i[0] =
00440    index.i[1] =
00441    index.i[2] =
00442    index.i[3] = reg->SrcRegister.Index;
00443 
00444    if (reg->SrcRegister.Indirect) {
00445       union spu_exec_channel index2;
00446       union spu_exec_channel indir_index;
00447 
00448       index2.i[0] =
00449       index2.i[1] =
00450       index2.i[2] =
00451       index2.i[3] = reg->SrcRegisterInd.Index;
00452 
00453       swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
00454                                                    CHAN_X);
00455       fetch_src_file_channel(
00456          mach,
00457          reg->SrcRegisterInd.File,
00458          swizzle,
00459          &index2,
00460          &indir_index );
00461 
00462       index.q = si_a(index.q, indir_index.q);
00463    }
00464 
00465    if( reg->SrcRegister.Dimension ) {
00466       switch( reg->SrcRegister.File ) {
00467       case TGSI_FILE_INPUT:
00468          index.q = si_mpyi(index.q, 17);
00469          break;
00470       case TGSI_FILE_CONSTANT:
00471          index.q = si_shli(index.q, 12);
00472          break;
00473       default:
00474          ASSERT( 0 );
00475       }
00476 
00477       index.i[0] += reg->SrcRegisterDim.Index;
00478       index.i[1] += reg->SrcRegisterDim.Index;
00479       index.i[2] += reg->SrcRegisterDim.Index;
00480       index.i[3] += reg->SrcRegisterDim.Index;
00481 
00482       if (reg->SrcRegisterDim.Indirect) {
00483          union spu_exec_channel index2;
00484          union spu_exec_channel indir_index;
00485 
00486          index2.i[0] =
00487          index2.i[1] =
00488          index2.i[2] =
00489          index2.i[3] = reg->SrcRegisterDimInd.Index;
00490 
00491          swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
00492          fetch_src_file_channel(
00493             mach,
00494             reg->SrcRegisterDimInd.File,
00495             swizzle,
00496             &index2,
00497             &indir_index );
00498 
00499          index.q = si_a(index.q, indir_index.q);
00500       }
00501    }
00502 
00503    swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
00504    fetch_src_file_channel(
00505       mach,
00506       reg->SrcRegister.File,
00507       swizzle,
00508       &index,
00509       chan );
00510 
00511    switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
00512    case TGSI_UTIL_SIGN_CLEAR:
00513       chan->q = micro_abs(chan->q);
00514       break;
00515 
00516    case TGSI_UTIL_SIGN_SET:
00517       chan->q = micro_set_sign(chan->q);
00518       break;
00519 
00520    case TGSI_UTIL_SIGN_TOGGLE:
00521       chan->q = micro_neg(chan->q);
00522       break;
00523 
00524    case TGSI_UTIL_SIGN_KEEP:
00525       break;
00526    }
00527 
00528    if (reg->SrcRegisterExtMod.Complement) {
00529       chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
00530    }
00531 }

static void fetch_src_file_channel ( const struct spu_exec_machine mach,
const uint  file,
const uint  swizzle,
const union spu_exec_channel index,
union spu_exec_channel chan 
) [static]

Definition at line 341 of file spu_exec.c.

References spu_exec_machine::Addrs, ASSERT, spu_exec_machine::Consts, spu_exec_channel::f, spu_exec_channel::i, spu_exec_machine::ImmLimit, spu_exec_machine::Imms, spu_exec_machine::Inputs, spu_exec_machine::Outputs, spu_dcache_fetch_unaligned(), TEMP_0_C, TEMP_0_I, TEMP_1_C, TEMP_1_I, TGSI_EXTSWIZZLE_ONE, TGSI_EXTSWIZZLE_W, TGSI_EXTSWIZZLE_X, TGSI_EXTSWIZZLE_Y, TGSI_EXTSWIZZLE_Z, TGSI_EXTSWIZZLE_ZERO, TGSI_FILE_ADDRESS, TGSI_FILE_CONSTANT, TGSI_FILE_IMMEDIATE, TGSI_FILE_INPUT, TGSI_FILE_OUTPUT, TGSI_FILE_TEMPORARY, spu_exec_channel::u, and spu_exec_vector::xyzw.

00347 {
00348    switch( swizzle ) {
00349    case TGSI_EXTSWIZZLE_X:
00350    case TGSI_EXTSWIZZLE_Y:
00351    case TGSI_EXTSWIZZLE_Z:
00352    case TGSI_EXTSWIZZLE_W:
00353       switch( file ) {
00354       case TGSI_FILE_CONSTANT: {
00355          unsigned i;
00356 
00357          for (i = 0; i < 4; i++) {
00358             const float *ptr = mach->Consts[index->i[i]];
00359             float tmp[4];
00360 
00361             spu_dcache_fetch_unaligned((qword *) tmp,
00362                                        (uintptr_t)(ptr + swizzle),
00363                                        sizeof(float));
00364 
00365             chan->f[i] = tmp[0];
00366          }
00367          break;
00368       }
00369 
00370       case TGSI_FILE_INPUT:
00371          chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
00372          chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
00373          chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
00374          chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
00375          break;
00376 
00377       case TGSI_FILE_TEMPORARY:
00378          chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
00379          chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
00380          chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
00381          chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
00382          break;
00383 
00384       case TGSI_FILE_IMMEDIATE:
00385          ASSERT( index->i[0] < (int) mach->ImmLimit );
00386          ASSERT( index->i[1] < (int) mach->ImmLimit );
00387          ASSERT( index->i[2] < (int) mach->ImmLimit );
00388          ASSERT( index->i[3] < (int) mach->ImmLimit );
00389 
00390          chan->f[0] = mach->Imms[index->i[0]][swizzle];
00391          chan->f[1] = mach->Imms[index->i[1]][swizzle];
00392          chan->f[2] = mach->Imms[index->i[2]][swizzle];
00393          chan->f[3] = mach->Imms[index->i[3]][swizzle];
00394          break;
00395 
00396       case TGSI_FILE_ADDRESS:
00397          chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
00398          chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
00399          chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
00400          chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
00401          break;
00402 
00403       case TGSI_FILE_OUTPUT:
00404          /* vertex/fragment output vars can be read too */
00405          chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
00406          chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
00407          chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
00408          chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
00409          break;
00410 
00411       default:
00412          ASSERT( 0 );
00413       }
00414       break;
00415 
00416    case TGSI_EXTSWIZZLE_ZERO:
00417       *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
00418       break;
00419 
00420    case TGSI_EXTSWIZZLE_ONE:
00421       *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
00422       break;
00423 
00424    default:
00425       ASSERT( 0 );
00426    }
00427 }

static void fetch_texel ( struct spu_sampler sampler,
const union spu_exec_channel s,
const union spu_exec_channel t,
const union spu_exec_channel p,
float  lodbias,
union spu_exec_channel r,
union spu_exec_channel g,
union spu_exec_channel b,
union spu_exec_channel a 
) [static]

Definition at line 661 of file spu_exec.c.

References spu_exec_channel::f, spu_sampler::get_samples, and spu_exec_channel::q.

00670 {
00671    qword rgba[4];
00672    qword out[4];
00673 
00674    sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, 
00675                         (float (*)[4]) rgba);
00676 
00677    _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
00678    r->q = out[0];
00679    g->q = out[1];
00680    b->q = out[2];
00681    a->q = out[3];
00682 }

static void linear_interpolation ( struct spu_exec_machine mach,
unsigned  attrib,
unsigned  chan 
) [static]

Definition at line 796 of file spu_exec.c.

References spu_interp_coef::a0, spu_interp_coef::dadx, spu_interp_coef::dady, spu_exec_channel::f, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, spu_exec_machine::QuadPos, and spu_exec_vector::xyzw.

00800 {
00801    const float x = mach->QuadPos.xyzw[0].f[0];
00802    const float y = mach->QuadPos.xyzw[1].f[0];
00803    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00804    const float dady = mach->InterpCoefs[attrib].dady[chan];
00805    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00806    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
00807    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
00808    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
00809    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
00810 }

static qword micro_abs ( qword  src  )  [static]

Definition at line 170 of file spu_exec.c.

00171 {
00172    return si_rotmi(si_shli(src, 1), -1);
00173 }

static qword micro_ceil ( qword  src  )  [static]

Definition at line 176 of file spu_exec.c.

00177 {
00178    return (qword) _ceilf4((vec_float4) src);
00179 }

static qword micro_cos ( qword  src  )  [static]

Definition at line 182 of file spu_exec.c.

00183 {
00184    return (qword) _cosf4((vec_float4) src);
00185 }

static qword micro_ddx ( qword  src  )  [static]

Definition at line 221 of file spu_exec.c.

References bl_shuf, and br_shuf.

00222 {
00223    qword bottom_right = si_shufb(src, src, br_shuf);
00224    qword bottom_left = si_shufb(src, src, bl_shuf);
00225 
00226    return si_fs(bottom_right, bottom_left);
00227 }

static qword micro_ddy ( qword  src  )  [static]

Definition at line 230 of file spu_exec.c.

References bl_shuf, and tl_shuf.

00231 {
00232    qword top_left = si_shufb(src, src, tl_shuf);
00233    qword bottom_left = si_shufb(src, src, bl_shuf);
00234 
00235    return si_fs(top_left, bottom_left);
00236 }

static qword micro_div ( qword  src0,
qword  src1 
) [static]

Definition at line 239 of file spu_exec.c.

00240 {
00241    return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
00242 }

static qword micro_flr ( qword  src  )  [static]

Definition at line 245 of file spu_exec.c.

00246 {
00247    return (qword) _floorf4((vec_float4) src);
00248 }

static qword micro_frc ( qword  src  )  [static]

Definition at line 251 of file spu_exec.c.

00252 {
00253    return si_fs(src, (qword) _floorf4((vec_float4) src));
00254 }

static qword micro_ge ( qword  src0,
qword  src1 
) [static]

Definition at line 257 of file spu_exec.c.

00258 {
00259    return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00260 }

static qword micro_ishr ( qword  src0,
qword  src1 
) [static]

Definition at line 317 of file spu_exec.c.

00318 {
00319    return si_rotma(src0, si_sfi(src1, 0));
00320 }

static qword micro_lg2 ( qword  src  )  [static]

Definition at line 263 of file spu_exec.c.

00264 {
00265    return (qword) _log2f4((vec_float4) src);
00266 }

static qword micro_lt ( qword  src0,
qword  src1 
) [static]

Definition at line 269 of file spu_exec.c.

00270 {
00271    const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
00272 
00273    return si_xori(tmp, 0xff);
00274 }

static qword micro_max ( qword  src0,
qword  src1 
) [static]

Definition at line 277 of file spu_exec.c.

00278 {
00279    return si_selb(src1, src0, si_fcgt(src0, src1));
00280 }

static qword micro_min ( qword  src0,
qword  src1 
) [static]

Definition at line 283 of file spu_exec.c.

00284 {
00285    return si_selb(src0, src1, si_fcgt(src0, src1));
00286 }

static qword micro_neg ( qword  src  )  [static]

Definition at line 289 of file spu_exec.c.

00290 {
00291    return si_xor(src, (qword) spu_splats(0x80000000));
00292 }

static qword micro_pow ( qword  src0,
qword  src1 
) [static]

Definition at line 301 of file spu_exec.c.

00302 {
00303    return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
00304 }

static qword micro_rnd ( qword  src  )  [static]

Definition at line 307 of file spu_exec.c.

00308 {
00309    const qword half = (qword) spu_splats(0.5f);
00310 
00311    /* May be able to use _roundf4.  There may be some difference, though.
00312     */
00313    return (qword) _floorf4((vec_float4) si_fa(src, half));
00314 }

static qword micro_set_sign ( qword  src  )  [static]

Definition at line 295 of file spu_exec.c.

00296 {
00297    return si_or(src, (qword) spu_splats(0x80000000));
00298 }

static qword micro_sin ( qword  src  )  [static]

Definition at line 329 of file spu_exec.c.

00330 {
00331    return (qword) _sinf4((vec_float4) src);
00332 }

static qword micro_sqrt ( qword  src  )  [static]

Definition at line 335 of file spu_exec.c.

00336 {
00337    return (qword) _sqrtf4((vec_float4) src);
00338 }

static qword micro_trunc ( qword  src  )  [static]

Definition at line 323 of file spu_exec.c.

00324 {
00325    return (qword) _truncf4((vec_float4) src);
00326 }

static void perspective_interpolation ( struct spu_exec_machine mach,
unsigned  attrib,
unsigned  chan 
) [static]

Definition at line 813 of file spu_exec.c.

References spu_interp_coef::a0, spu_interp_coef::dadx, spu_interp_coef::dady, spu_exec_channel::f, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, spu_exec_machine::QuadPos, and spu_exec_vector::xyzw.

00817 {
00818    const float x = mach->QuadPos.xyzw[0].f[0];
00819    const float y = mach->QuadPos.xyzw[1].f[0];
00820    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
00821    const float dady = mach->InterpCoefs[attrib].dady[chan];
00822    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
00823    const float *w = mach->QuadPos.xyzw[3].f;
00824    /* divide by W here */
00825    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
00826    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
00827    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
00828    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
00829 }

void spu_exec_machine_init ( struct spu_exec_machine mach,
uint  numSamplers,
struct spu_sampler samplers,
unsigned  processor 
)

Initialize machine state by expanding tokens to full instructions, allocating temporary storage, setting up constants, etc.

After this, we can call spu_exec_machine_run() many times.

Definition at line 143 of file spu_exec.c.

References spu_exec_machine::Addrs, spu_exec_machine::Processor, spu_exec_machine::Samplers, TEMP_0_C, TEMP_0_I, TEMP_128_C, TEMP_128_I, TEMP_1_C, TEMP_1_I, TEMP_2_C, TEMP_2_I, TEMP_7F_C, TEMP_7F_I, TEMP_80_C, TEMP_80_I, TEMP_FF_C, TEMP_FF_I, TEMP_M128_C, TEMP_M128_I, and TGSI_EXEC_NUM_TEMPS.

00147 {
00148    const qword zero = si_il(0);
00149    const qword not_zero = si_il(~0);
00150 
00151    (void) numSamplers;
00152    mach->Samplers = samplers;
00153    mach->Processor = processor;
00154    mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
00155 
00156    /* Setup constants. */
00157    mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
00158    mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
00159    mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
00160    mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
00161 
00162    mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
00163    mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
00164    mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
00165    mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
00166 }

uint spu_exec_machine_run ( struct spu_exec_machine mach  ) 

Run TGSI interpreter.

Returns:
bitmask of "alive" quad components

Definition at line 1866 of file spu_exec.c.

References ALIGN16_ATTRIB, ASSERT, spu_exec_machine::CallStackTop, spu_exec_machine::CondMask, spu_exec_machine::CondStackTop, spu_exec_machine::ContMask, spu_exec_machine::ContStackTop, spu_exec_machine::Declarations, exec_declaration(), exec_instruction(), spu_exec_machine::ExecMask, spu_exec_channel::f, spu_exec_machine::FuncMask, spu_exec_channel::i, spu_exec_machine::Instructions, spu_exec_machine::LoopMask, spu_exec_machine::LoopStackTop, spu_exec_machine::NumDeclarations, spu_exec_machine::Outputs, spu_exec_machine::Primitives, spu_exec_machine::Processor, ROUNDUP16, spu_dcache_fetch_unaligned(), TEMP_KILMASK_C, TEMP_KILMASK_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TEMP_PRIMITIVE_C, TEMP_PRIMITIVE_I, TGSI_PROCESSOR_FRAGMENT, TGSI_PROCESSOR_GEOMETRY, and spu_exec_vector::xyzw.

01867 {
01868    uint i;
01869    int pc = 0;
01870 
01871    mach->CondMask = 0xf;
01872    mach->LoopMask = 0xf;
01873    mach->ContMask = 0xf;
01874    mach->FuncMask = 0xf;
01875    mach->ExecMask = 0xf;
01876 
01877    mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
01878    ASSERT(mach->CondStackTop == 0);
01879    ASSERT(mach->LoopStackTop == 0);
01880    ASSERT(mach->ContStackTop == 0);
01881    ASSERT(mach->CallStackTop == 0);
01882 
01883    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
01884    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
01885 
01886    if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
01887       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
01888       mach->Primitives[0] = 0;
01889    }
01890 
01891 
01892    /* execute declarations (interpolants) */
01893    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
01894       for (i = 0; i < mach->NumDeclarations; i++) {
01895          union {
01896             struct tgsi_full_declaration decl;
01897             qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
01898          } d ALIGN16_ATTRIB;
01899          unsigned ea = (unsigned) (mach->Declarations + pc);
01900 
01901          spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
01902 
01903          exec_declaration( mach, &d.decl );
01904       }
01905    }
01906 
01907    /* execute instructions, until pc is set to -1 */
01908    while (pc != -1) {
01909       union {
01910          struct tgsi_full_instruction inst;
01911          qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
01912       } i ALIGN16_ATTRIB;
01913       unsigned ea = (unsigned) (mach->Instructions + pc);
01914 
01915       spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
01916       exec_instruction( mach, & i.inst, &pc );
01917    }
01918 
01919 #if 0
01920    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
01921    if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
01922       /*
01923        * Scale back depth component.
01924        */
01925       for (i = 0; i < 4; i++)
01926          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
01927    }
01928 #endif
01929 
01930    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
01931 }

static void store_dest ( struct spu_exec_machine mach,
const union spu_exec_channel chan,
const struct tgsi_full_dst_register reg,
const struct tgsi_full_instruction inst,
uint  chan_index 
) [static]

Definition at line 534 of file spu_exec.c.

References spu_exec_machine::Addrs, ASSERT, tgsi_full_dst_register::DstRegister, spu_exec_machine::ExecMask, tgsi_dst_register::File, spu_exec_channel::i, tgsi_dst_register::Index, tgsi_full_instruction::Instruction, micro_max(), micro_min(), spu_exec_machine::Outputs, spu_exec_channel::q, tgsi_instruction::Saturate, TEMP_0_C, TEMP_0_I, TEMP_1_C, TEMP_1_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TGSI_FILE_ADDRESS, TGSI_FILE_NULL, TGSI_FILE_OUTPUT, TGSI_FILE_TEMPORARY, TGSI_SAT_MINUS_PLUS_ONE, TGSI_SAT_NONE, TGSI_SAT_ZERO_ONE, and spu_exec_vector::xyzw.

00540 {
00541    union spu_exec_channel *dst;
00542 
00543    switch( reg->DstRegister.File ) {
00544    case TGSI_FILE_NULL:
00545       return;
00546 
00547    case TGSI_FILE_OUTPUT:
00548       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
00549                            + reg->DstRegister.Index].xyzw[chan_index];
00550       break;
00551 
00552    case TGSI_FILE_TEMPORARY:
00553       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
00554       break;
00555 
00556    case TGSI_FILE_ADDRESS:
00557       dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
00558       break;
00559 
00560    default:
00561       ASSERT( 0 );
00562       return;
00563    }
00564 
00565    switch (inst->Instruction.Saturate)
00566    {
00567    case TGSI_SAT_NONE:
00568       if (mach->ExecMask & 0x1)
00569          dst->i[0] = chan->i[0];
00570       if (mach->ExecMask & 0x2)
00571          dst->i[1] = chan->i[1];
00572       if (mach->ExecMask & 0x4)
00573          dst->i[2] = chan->i[2];
00574       if (mach->ExecMask & 0x8)
00575          dst->i[3] = chan->i[3];
00576       break;
00577 
00578    case TGSI_SAT_ZERO_ONE:
00579       /* XXX need to obey ExecMask here */
00580       dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
00581       dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
00582       break;
00583 
00584    case TGSI_SAT_MINUS_PLUS_ONE:
00585       ASSERT( 0 );
00586       break;
00587 
00588    default:
00589       ASSERT( 0 );
00590    }
00591 }


Variable Documentation

const qword bl_shuf [static]

Initial value:

 {
    2  + 0,  2  + 1,
    2  + 2,  2  + 3,
    2  + 0,  2  + 1,
    2  + 2,  2  + 3,
    2  + 0,  2  + 1,
    2  + 2,  2  + 3,
    2  + 0,  2  + 1,
    2  + 2,  2  + 3,
}

Definition at line 198 of file spu_exec.c.

const qword br_shuf [static]

Initial value:

 {
    3  + 0,  3  + 1,
    3  + 2,  3  + 3,
    3  + 0,  3  + 1,
    3  + 2,  3  + 3,
    3  + 0,  3  + 1,
    3  + 2,  3  + 3,
    3  + 0,  3  + 1,
    3  + 2,  3  + 3,
}

Definition at line 187 of file spu_exec.c.

const qword tl_shuf [static]

Initial value:

 {
    0  + 0,  0  + 1,
    0  + 2,  0  + 3,
    0  + 0,  0  + 1,
    0  + 2,  0  + 3,
    0  + 0,  0  + 1,
    0  + 2,  0  + 3,
    0  + 0,  0  + 1,
    0  + 2,  0  + 3,
}

Definition at line 209 of file spu_exec.c.


Generated on Tue Sep 29 06:25:32 2009 for Gallium3D by  doxygen 1.5.4