Go to the source code of this file.
Defines | |
#define | TILE_TOP_LEFT 0 |
TGSI interpretor/executor. | |
#define | TILE_TOP_RIGHT 1 |
#define | TILE_BOTTOM_LEFT 2 |
#define | TILE_BOTTOM_RIGHT 3 |
#define | TEMP_0_I TGSI_EXEC_TEMP_00000000_I |
#define | TEMP_0_C TGSI_EXEC_TEMP_00000000_C |
#define | TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I |
#define | TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C |
#define | TEMP_80_I TGSI_EXEC_TEMP_80000000_I |
#define | TEMP_80_C TGSI_EXEC_TEMP_80000000_C |
#define | TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I |
#define | TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C |
#define | TEMP_1_I TGSI_EXEC_TEMP_ONE_I |
#define | TEMP_1_C TGSI_EXEC_TEMP_ONE_C |
#define | TEMP_2_I TGSI_EXEC_TEMP_TWO_I |
#define | TEMP_2_C TGSI_EXEC_TEMP_TWO_C |
#define | TEMP_128_I TGSI_EXEC_TEMP_128_I |
#define | TEMP_128_C TGSI_EXEC_TEMP_128_C |
#define | TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I |
#define | TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C |
#define | TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I |
#define | TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C |
#define | TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I |
#define | TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C |
#define | TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I |
#define | TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C |
#define | TEMP_R0 TGSI_EXEC_TEMP_R0 |
#define | FOR_EACH_CHANNEL(CHAN) for (CHAN = 0; CHAN < 4; CHAN++) |
#define | IS_CHANNEL_ENABLED(INST, CHAN) ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) |
#define | IS_CHANNEL_ENABLED2(INST, CHAN) ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) |
#define | FOR_EACH_ENABLED_CHANNEL(INST, CHAN) |
#define | FOR_EACH_ENABLED_CHANNEL2(INST, CHAN) |
#define | UPDATE_EXEC_MASK(MACH) MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask |
The execution mask depends on the conditional mask and the loop mask. | |
#define | CHAN_X 0 |
#define | CHAN_Y 1 |
#define | CHAN_Z 2 |
#define | CHAN_W 3 |
#define | FETCH(VAL, INDEX, CHAN) fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) |
#define | STORE(VAL, INDEX, CHAN) store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) |
Typedefs | |
typedef void(* | interpolation_func )(struct spu_exec_machine *mach, unsigned attrib, unsigned chan) |
Functions | |
void | spu_exec_machine_init (struct spu_exec_machine *mach, uint numSamplers, struct spu_sampler *samplers, unsigned processor) |
Initialize machine state by expanding tokens to full instructions, allocating temporary storage, setting up constants, etc. | |
static qword | micro_abs (qword src) |
static qword | micro_ceil (qword src) |
static qword | micro_cos (qword src) |
static qword | micro_ddx (qword src) |
static qword | micro_ddy (qword src) |
static qword | micro_div (qword src0, qword src1) |
static qword | micro_flr (qword src) |
static qword | micro_frc (qword src) |
static qword | micro_ge (qword src0, qword src1) |
static qword | micro_lg2 (qword src) |
static qword | micro_lt (qword src0, qword src1) |
static qword | micro_max (qword src0, qword src1) |
static qword | micro_min (qword src0, qword src1) |
static qword | micro_neg (qword src) |
static qword | micro_set_sign (qword src) |
static qword | micro_pow (qword src0, qword src1) |
static qword | micro_rnd (qword src) |
static qword | micro_ishr (qword src0, qword src1) |
static qword | micro_trunc (qword src) |
static qword | micro_sin (qword src) |
static qword | micro_sqrt (qword src) |
static void | fetch_src_file_channel (const struct spu_exec_machine *mach, const uint file, const uint swizzle, const union spu_exec_channel *index, union spu_exec_channel *chan) |
static void | fetch_source (const struct spu_exec_machine *mach, union spu_exec_channel *chan, const struct tgsi_full_src_register *reg, const uint chan_index) |
static void | store_dest (struct spu_exec_machine *mach, const union spu_exec_channel *chan, const struct tgsi_full_dst_register *reg, const struct tgsi_full_instruction *inst, uint chan_index) |
static void | exec_kil (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst) |
Execute ARB-style KIL which is predicated by a src register. | |
static void | exec_kilp (struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) |
Execute NVIDIA-style KIL which is predicated by a condition code. | |
static void | fetch_texel (struct spu_sampler *sampler, const union spu_exec_channel *s, const union spu_exec_channel *t, const union spu_exec_channel *p, float lodbias, union spu_exec_channel *r, union spu_exec_channel *g, union spu_exec_channel *b, union spu_exec_channel *a) |
static void | exec_tex (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) |
static void | constant_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan) |
static void | linear_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan) |
static void | perspective_interpolation (struct spu_exec_machine *mach, unsigned attrib, unsigned chan) |
static void | exec_declaration (struct spu_exec_machine *mach, const struct tgsi_full_declaration *decl) |
static void | exec_instruction (struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, int *pc) |
uint | spu_exec_machine_run (struct spu_exec_machine *mach) |
Run TGSI interpreter. | |
Variables | |
static const qword | br_shuf |
static const qword | bl_shuf |
static const qword | tl_shuf |
#define CHAN_W 3 |
Definition at line 133 of file spu_exec.c.
#define CHAN_X 0 |
Definition at line 130 of file spu_exec.c.
#define CHAN_Y 1 |
Definition at line 131 of file spu_exec.c.
#define CHAN_Z 2 |
Definition at line 132 of file spu_exec.c.
#define FETCH | ( | VAL, | |||
INDEX, | |||||
CHAN | ) | fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) |
Definition at line 593 of file spu_exec.c.
#define FOR_EACH_CHANNEL | ( | CHAN | ) | for (CHAN = 0; CHAN < 4; CHAN++) |
Definition at line 107 of file spu_exec.c.
#define FOR_EACH_ENABLED_CHANNEL | ( | INST, | |||
CHAN | ) |
Value:
FOR_EACH_CHANNEL( CHAN )\ if (IS_CHANNEL_ENABLED( INST, CHAN ))
Definition at line 116 of file spu_exec.c.
#define FOR_EACH_ENABLED_CHANNEL2 | ( | INST, | |||
CHAN | ) |
Value:
FOR_EACH_CHANNEL( CHAN )\ if (IS_CHANNEL_ENABLED2( INST, CHAN ))
Definition at line 120 of file spu_exec.c.
#define IS_CHANNEL_ENABLED | ( | INST, | |||
CHAN | ) | ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) |
Definition at line 110 of file spu_exec.c.
#define IS_CHANNEL_ENABLED2 | ( | INST, | |||
CHAN | ) | ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) |
Definition at line 113 of file spu_exec.c.
#define STORE | ( | VAL, | |||
INDEX, | |||||
CHAN | ) | store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) |
Definition at line 596 of file spu_exec.c.
#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C |
Definition at line 84 of file spu_exec.c.
#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I |
Definition at line 83 of file spu_exec.c.
#define TEMP_128_C TGSI_EXEC_TEMP_128_C |
Definition at line 96 of file spu_exec.c.
#define TEMP_128_I TGSI_EXEC_TEMP_128_I |
Definition at line 95 of file spu_exec.c.
#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C |
Definition at line 92 of file spu_exec.c.
#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I |
Definition at line 91 of file spu_exec.c.
#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C |
Definition at line 94 of file spu_exec.c.
#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I |
Definition at line 93 of file spu_exec.c.
#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C |
Definition at line 86 of file spu_exec.c.
#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I |
Definition at line 85 of file spu_exec.c.
#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C |
Definition at line 88 of file spu_exec.c.
#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I |
Definition at line 87 of file spu_exec.c.
#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C |
Definition at line 90 of file spu_exec.c.
#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I |
Definition at line 89 of file spu_exec.c.
#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C |
Definition at line 100 of file spu_exec.c.
#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I |
Definition at line 99 of file spu_exec.c.
#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C |
Definition at line 98 of file spu_exec.c.
#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I |
Definition at line 97 of file spu_exec.c.
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C |
Definition at line 102 of file spu_exec.c.
#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I |
Definition at line 101 of file spu_exec.c.
#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C |
Definition at line 104 of file spu_exec.c.
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I |
Definition at line 103 of file spu_exec.c.
#define TEMP_R0 TGSI_EXEC_TEMP_R0 |
Definition at line 105 of file spu_exec.c.
#define TILE_BOTTOM_LEFT 2 |
Definition at line 77 of file spu_exec.c.
#define TILE_BOTTOM_RIGHT 3 |
Definition at line 78 of file spu_exec.c.
#define TILE_TOP_LEFT 0 |
TGSI interpretor/executor.
Flow control information:
Since we operate on 'quads' (4 pixels or 4 vertices in parallel) flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special care since a condition may be true for some quad components but false for other components.
We basically execute all statements (even if they're in the part of an IF/ELSE clause that's "not taken") and use a special mask to control writing to destination registers. This is the ExecMask. See store_dest().
The ExecMask is computed from three other masks (CondMask, LoopMask and ContMask) which are controlled by the flow control instructions (namely: (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
Authors: Michal Krol Brian Paul
Definition at line 75 of file spu_exec.c.
#define TILE_TOP_RIGHT 1 |
Definition at line 76 of file spu_exec.c.
#define UPDATE_EXEC_MASK | ( | MACH | ) | MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask |
The execution mask depends on the conditional mask and the loop mask.
Definition at line 126 of file spu_exec.c.
typedef void(* interpolation_func)(struct spu_exec_machine *mach, unsigned attrib, unsigned chan) |
Definition at line 832 of file spu_exec.c.
static void constant_interpolation | ( | struct spu_exec_machine * | mach, | |
unsigned | attrib, | |||
unsigned | chan | |||
) | [static] |
Definition at line 783 of file spu_exec.c.
References spu_interp_coef::a0, spu_exec_channel::f, spu_exec_channel::i, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, QUAD_SIZE, and spu_exec_vector::xyzw.
00787 { 00788 unsigned i; 00789 00790 for( i = 0; i < QUAD_SIZE; i++ ) { 00791 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 00792 } 00793 }
static void exec_declaration | ( | struct spu_exec_machine * | mach, | |
const struct tgsi_full_declaration * | decl | |||
) | [static] |
Definition at line 838 of file spu_exec.c.
References ASSERT, constant_interpolation(), tgsi_full_declaration::Declaration, tgsi_full_declaration::DeclarationRange, tgsi_declaration::File, tgsi_declaration_range::First, interp(), tgsi_declaration::Interpolate, tgsi_declaration_range::Last, linear_interpolation(), NUM_CHANNELS, perspective_interpolation(), spu_exec_machine::Processor, TGSI_FILE_INPUT, TGSI_INTERPOLATE_CONSTANT, TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_PERSPECTIVE, TGSI_PROCESSOR_FRAGMENT, TGSI_WRITEMASK_XYZW, and tgsi_declaration::UsageMask.
00840 { 00841 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 00842 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 00843 unsigned first, last, mask; 00844 interpolation_func interp; 00845 00846 first = decl->DeclarationRange.First; 00847 last = decl->DeclarationRange.Last; 00848 mask = decl->Declaration.UsageMask; 00849 00850 switch( decl->Declaration.Interpolate ) { 00851 case TGSI_INTERPOLATE_CONSTANT: 00852 interp = constant_interpolation; 00853 break; 00854 00855 case TGSI_INTERPOLATE_LINEAR: 00856 interp = linear_interpolation; 00857 break; 00858 00859 case TGSI_INTERPOLATE_PERSPECTIVE: 00860 interp = perspective_interpolation; 00861 break; 00862 00863 default: 00864 ASSERT( 0 ); 00865 } 00866 00867 if( mask == TGSI_WRITEMASK_XYZW ) { 00868 unsigned i, j; 00869 00870 for( i = first; i <= last; i++ ) { 00871 for( j = 0; j < NUM_CHANNELS; j++ ) { 00872 interp( mach, i, j ); 00873 } 00874 } 00875 } 00876 else { 00877 unsigned i, j; 00878 00879 for( j = 0; j < NUM_CHANNELS; j++ ) { 00880 if( mask & (1 << j) ) { 00881 for( i = first; i <= last; i++ ) { 00882 interp( mach, i, j ); 00883 } 00884 } 00885 } 00886 } 00887 } 00888 } 00889 }
static void exec_instruction | ( | struct spu_exec_machine * | mach, | |
const struct tgsi_full_instruction * | inst, | |||
int * | pc | |||
) | [static] |
Definition at line 892 of file spu_exec.c.
References ASSERT, spu_exec_machine::CallStack, spu_exec_machine::CallStackTop, CHAN_W, CHAN_X, CHAN_Y, CHAN_Z, spu_exec_machine::CondMask, spu_exec_machine::CondStack, spu_exec_machine::CondStackTop, spu_exec_machine::ContMask, spu_exec_machine::ContStack, spu_exec_machine::ContStackTop, exec_kil(), exec_kilp(), exec_tex(), spu_exec_machine::ExecMask, FALSE, FETCH, FOR_EACH_ENABLED_CHANNEL, spu_exec_machine::FuncMask, spu_exec_machine::FuncStack, spu_exec_machine::FuncStackTop, tgsi_full_instruction::Instruction, tgsi_full_instruction::InstructionExtLabel, IS_CHANNEL_ENABLED, tgsi_instruction_ext_label::Label, spu_exec_machine::LoopMask, spu_exec_machine::LoopStack, spu_exec_machine::LoopStackTop, micro_abs(), micro_ceil(), micro_cos(), micro_ddx(), micro_ddy(), micro_div(), micro_flr(), micro_frc(), micro_ge(), micro_ishr(), micro_lg2(), micro_lt(), micro_max(), micro_min(), micro_pow(), micro_rnd(), micro_sin(), micro_sqrt(), micro_trunc(), tgsi_instruction::Opcode, spu_exec_machine::Primitives, spu_exec_channel::q, STORE, TEMP_0_C, TEMP_0_I, TEMP_128_C, TEMP_128_I, TEMP_1_C, TEMP_1_I, TEMP_2_C, TEMP_2_I, TEMP_M128_C, TEMP_M128_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TEMP_PRIMITIVE_C, TEMP_PRIMITIVE_I, TGSI_EXEC_MAX_CALL_NESTING, TGSI_EXEC_MAX_COND_NESTING, TGSI_EXEC_MAX_LOOP_NESTING, TGSI_OPCODE_ABS, TGSI_OPCODE_ADD, TGSI_OPCODE_AND, TGSI_OPCODE_ARA, TGSI_OPCODE_ARL, TGSI_OPCODE_ARR, TGSI_OPCODE_BGNLOOP2, TGSI_OPCODE_BGNSUB, TGSI_OPCODE_BRA, TGSI_OPCODE_BRK, TGSI_OPCODE_CAL, TGSI_OPCODE_CEIL, TGSI_OPCODE_CLAMP, TGSI_OPCODE_CMP, TGSI_OPCODE_CND, TGSI_OPCODE_CND0, TGSI_OPCODE_CONT, TGSI_OPCODE_COS, TGSI_OPCODE_CROSSPRODUCT, TGSI_OPCODE_DDX, TGSI_OPCODE_DDY, TGSI_OPCODE_DIV, TGSI_OPCODE_DOT2ADD, TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4, TGSI_OPCODE_DPH, TGSI_OPCODE_DST, TGSI_OPCODE_ELSE, TGSI_OPCODE_EMIT, TGSI_OPCODE_END, TGSI_OPCODE_ENDIF, TGSI_OPCODE_ENDLOOP, TGSI_OPCODE_ENDLOOP2, TGSI_OPCODE_ENDPRIM, TGSI_OPCODE_ENDREP, TGSI_OPCODE_ENDSUB, TGSI_OPCODE_EXP, TGSI_OPCODE_EXPBASE2, TGSI_OPCODE_FLOOR, TGSI_OPCODE_FRAC, TGSI_OPCODE_I2F, TGSI_OPCODE_IF, TGSI_OPCODE_INDEX, TGSI_OPCODE_KIL, TGSI_OPCODE_KILP, TGSI_OPCODE_LERP, TGSI_OPCODE_LIT, TGSI_OPCODE_LOG, TGSI_OPCODE_LOGBASE2, TGSI_OPCODE_LOOP, TGSI_OPCODE_MAD, TGSI_OPCODE_MAX, TGSI_OPCODE_MIN, TGSI_OPCODE_MOD, TGSI_OPCODE_MOV, TGSI_OPCODE_MUL, TGSI_OPCODE_MULTIPLYMATRIX, TGSI_OPCODE_NEGATE, TGSI_OPCODE_NOISE1, TGSI_OPCODE_NOISE2, TGSI_OPCODE_NOISE3, TGSI_OPCODE_NOISE4, TGSI_OPCODE_NOP, TGSI_OPCODE_NOT, TGSI_OPCODE_NRM, TGSI_OPCODE_OR, TGSI_OPCODE_PK2H, TGSI_OPCODE_PK2US, TGSI_OPCODE_PK4B, TGSI_OPCODE_PK4UB, TGSI_OPCODE_POPA, TGSI_OPCODE_POWER, TGSI_OPCODE_PUSHA, TGSI_OPCODE_RCC, TGSI_OPCODE_RCP, TGSI_OPCODE_REP, TGSI_OPCODE_RET, TGSI_OPCODE_RFL, TGSI_OPCODE_ROUND, TGSI_OPCODE_RSQ, TGSI_OPCODE_SAD, TGSI_OPCODE_SCS, TGSI_OPCODE_SEQ, TGSI_OPCODE_SFL, TGSI_OPCODE_SGE, TGSI_OPCODE_SGT, TGSI_OPCODE_SHL, TGSI_OPCODE_SHR, TGSI_OPCODE_SIN, TGSI_OPCODE_SLE, TGSI_OPCODE_SLT, TGSI_OPCODE_SNE, TGSI_OPCODE_SSG, TGSI_OPCODE_STR, TGSI_OPCODE_SUB, TGSI_OPCODE_SWZ, TGSI_OPCODE_TEX, TGSI_OPCODE_TRUNC, TGSI_OPCODE_TXB, TGSI_OPCODE_TXD, TGSI_OPCODE_TXF, TGSI_OPCODE_TXL, TGSI_OPCODE_TXP, TGSI_OPCODE_TXQ, TGSI_OPCODE_UP2H, TGSI_OPCODE_UP2US, TGSI_OPCODE_UP4B, TGSI_OPCODE_UP4UB, TGSI_OPCODE_X2D, TGSI_OPCODE_XOR, TRUE, spu_exec_channel::u, and UPDATE_EXEC_MASK.
00896 { 00897 uint chan_index; 00898 union spu_exec_channel r[8]; 00899 00900 (*pc)++; 00901 00902 switch (inst->Instruction.Opcode) { 00903 case TGSI_OPCODE_ARL: 00904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00905 FETCH( &r[0], 0, chan_index ); 00906 r[0].q = si_cflts(r[0].q, 0); 00907 STORE( &r[0], 0, chan_index ); 00908 } 00909 break; 00910 00911 case TGSI_OPCODE_MOV: 00912 case TGSI_OPCODE_SWZ: 00913 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00914 FETCH( &r[0], 0, chan_index ); 00915 STORE( &r[0], 0, chan_index ); 00916 } 00917 break; 00918 00919 case TGSI_OPCODE_LIT: 00920 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 00921 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 00922 } 00923 00924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 00925 FETCH( &r[0], 0, CHAN_X ); 00926 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 00927 r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); 00928 STORE( &r[0], 0, CHAN_Y ); 00929 } 00930 00931 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 00932 FETCH( &r[1], 0, CHAN_Y ); 00933 r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); 00934 00935 FETCH( &r[2], 0, CHAN_W ); 00936 r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); 00937 r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); 00938 r[1].q = micro_pow(r[1].q, r[2].q); 00939 00940 /* r0 = (r0 > 0.0) ? r1 : 0.0 00941 */ 00942 r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); 00943 r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, 00944 r[0].q); 00945 STORE( &r[0], 0, CHAN_Z ); 00946 } 00947 } 00948 00949 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 00950 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 00951 } 00952 break; 00953 00954 case TGSI_OPCODE_RCP: 00955 /* TGSI_OPCODE_RECIP */ 00956 FETCH( &r[0], 0, CHAN_X ); 00957 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); 00958 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00959 STORE( &r[0], 0, chan_index ); 00960 } 00961 break; 00962 00963 case TGSI_OPCODE_RSQ: 00964 /* TGSI_OPCODE_RECIPSQRT */ 00965 FETCH( &r[0], 0, CHAN_X ); 00966 r[0].q = micro_sqrt(r[0].q); 00967 r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); 00968 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00969 STORE( &r[0], 0, chan_index ); 00970 } 00971 break; 00972 00973 case TGSI_OPCODE_EXP: 00974 ASSERT (0); 00975 break; 00976 00977 case TGSI_OPCODE_LOG: 00978 ASSERT (0); 00979 break; 00980 00981 case TGSI_OPCODE_MUL: 00982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 00983 { 00984 FETCH(&r[0], 0, chan_index); 00985 FETCH(&r[1], 1, chan_index); 00986 00987 r[0].q = si_fm(r[0].q, r[1].q); 00988 00989 STORE(&r[0], 0, chan_index); 00990 } 00991 break; 00992 00993 case TGSI_OPCODE_ADD: 00994 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00995 FETCH( &r[0], 0, chan_index ); 00996 FETCH( &r[1], 1, chan_index ); 00997 r[0].q = si_fa(r[0].q, r[1].q); 00998 STORE( &r[0], 0, chan_index ); 00999 } 01000 break; 01001 01002 case TGSI_OPCODE_DP3: 01003 /* TGSI_OPCODE_DOT3 */ 01004 FETCH( &r[0], 0, CHAN_X ); 01005 FETCH( &r[1], 1, CHAN_X ); 01006 r[0].q = si_fm(r[0].q, r[1].q); 01007 01008 FETCH( &r[1], 0, CHAN_Y ); 01009 FETCH( &r[2], 1, CHAN_Y ); 01010 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01011 01012 01013 FETCH( &r[1], 0, CHAN_Z ); 01014 FETCH( &r[2], 1, CHAN_Z ); 01015 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01016 01017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01018 STORE( &r[0], 0, chan_index ); 01019 } 01020 break; 01021 01022 case TGSI_OPCODE_DP4: 01023 /* TGSI_OPCODE_DOT4 */ 01024 FETCH(&r[0], 0, CHAN_X); 01025 FETCH(&r[1], 1, CHAN_X); 01026 01027 r[0].q = si_fm(r[0].q, r[1].q); 01028 01029 FETCH(&r[1], 0, CHAN_Y); 01030 FETCH(&r[2], 1, CHAN_Y); 01031 01032 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01033 01034 FETCH(&r[1], 0, CHAN_Z); 01035 FETCH(&r[2], 1, CHAN_Z); 01036 01037 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01038 01039 FETCH(&r[1], 0, CHAN_W); 01040 FETCH(&r[2], 1, CHAN_W); 01041 01042 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01043 01044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01045 STORE( &r[0], 0, chan_index ); 01046 } 01047 break; 01048 01049 case TGSI_OPCODE_DST: 01050 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 01051 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 01052 } 01053 01054 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 01055 FETCH( &r[0], 0, CHAN_Y ); 01056 FETCH( &r[1], 1, CHAN_Y); 01057 r[0].q = si_fm(r[0].q, r[1].q); 01058 STORE( &r[0], 0, CHAN_Y ); 01059 } 01060 01061 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 01062 FETCH( &r[0], 0, CHAN_Z ); 01063 STORE( &r[0], 0, CHAN_Z ); 01064 } 01065 01066 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 01067 FETCH( &r[0], 1, CHAN_W ); 01068 STORE( &r[0], 0, CHAN_W ); 01069 } 01070 break; 01071 01072 case TGSI_OPCODE_MIN: 01073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01074 FETCH(&r[0], 0, chan_index); 01075 FETCH(&r[1], 1, chan_index); 01076 01077 r[0].q = micro_min(r[0].q, r[1].q); 01078 01079 STORE(&r[0], 0, chan_index); 01080 } 01081 break; 01082 01083 case TGSI_OPCODE_MAX: 01084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01085 FETCH(&r[0], 0, chan_index); 01086 FETCH(&r[1], 1, chan_index); 01087 01088 r[0].q = micro_max(r[0].q, r[1].q); 01089 01090 STORE(&r[0], 0, chan_index ); 01091 } 01092 break; 01093 01094 case TGSI_OPCODE_SLT: 01095 /* TGSI_OPCODE_SETLT */ 01096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01097 FETCH( &r[0], 0, chan_index ); 01098 FETCH( &r[1], 1, chan_index ); 01099 01100 r[0].q = micro_ge(r[0].q, r[1].q); 01101 r[0].q = si_xori(r[0].q, 0xff); 01102 01103 STORE( &r[0], 0, chan_index ); 01104 } 01105 break; 01106 01107 case TGSI_OPCODE_SGE: 01108 /* TGSI_OPCODE_SETGE */ 01109 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01110 FETCH( &r[0], 0, chan_index ); 01111 FETCH( &r[1], 1, chan_index ); 01112 r[0].q = micro_ge(r[0].q, r[1].q); 01113 STORE( &r[0], 0, chan_index ); 01114 } 01115 break; 01116 01117 case TGSI_OPCODE_MAD: 01118 /* TGSI_OPCODE_MADD */ 01119 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01120 FETCH( &r[0], 0, chan_index ); 01121 FETCH( &r[1], 1, chan_index ); 01122 FETCH( &r[2], 2, chan_index ); 01123 r[0].q = si_fma(r[0].q, r[1].q, r[2].q); 01124 STORE( &r[0], 0, chan_index ); 01125 } 01126 break; 01127 01128 case TGSI_OPCODE_SUB: 01129 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01130 FETCH(&r[0], 0, chan_index); 01131 FETCH(&r[1], 1, chan_index); 01132 01133 r[0].q = si_fs(r[0].q, r[1].q); 01134 01135 STORE(&r[0], 0, chan_index); 01136 } 01137 break; 01138 01139 case TGSI_OPCODE_LERP: 01140 /* TGSI_OPCODE_LRP */ 01141 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01142 FETCH(&r[0], 0, chan_index); 01143 FETCH(&r[1], 1, chan_index); 01144 FETCH(&r[2], 2, chan_index); 01145 01146 r[1].q = si_fs(r[1].q, r[2].q); 01147 r[0].q = si_fma(r[0].q, r[1].q, r[2].q); 01148 01149 STORE(&r[0], 0, chan_index); 01150 } 01151 break; 01152 01153 case TGSI_OPCODE_CND: 01154 ASSERT (0); 01155 break; 01156 01157 case TGSI_OPCODE_CND0: 01158 ASSERT (0); 01159 break; 01160 01161 case TGSI_OPCODE_DOT2ADD: 01162 /* TGSI_OPCODE_DP2A */ 01163 ASSERT (0); 01164 break; 01165 01166 case TGSI_OPCODE_INDEX: 01167 ASSERT (0); 01168 break; 01169 01170 case TGSI_OPCODE_NEGATE: 01171 ASSERT (0); 01172 break; 01173 01174 case TGSI_OPCODE_FRAC: 01175 /* TGSI_OPCODE_FRC */ 01176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01177 FETCH( &r[0], 0, chan_index ); 01178 r[0].q = micro_frc(r[0].q); 01179 STORE( &r[0], 0, chan_index ); 01180 } 01181 break; 01182 01183 case TGSI_OPCODE_CLAMP: 01184 ASSERT (0); 01185 break; 01186 01187 case TGSI_OPCODE_FLOOR: 01188 /* TGSI_OPCODE_FLR */ 01189 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01190 FETCH( &r[0], 0, chan_index ); 01191 r[0].q = micro_flr(r[0].q); 01192 STORE( &r[0], 0, chan_index ); 01193 } 01194 break; 01195 01196 case TGSI_OPCODE_ROUND: 01197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01198 FETCH( &r[0], 0, chan_index ); 01199 r[0].q = micro_rnd(r[0].q); 01200 STORE( &r[0], 0, chan_index ); 01201 } 01202 break; 01203 01204 case TGSI_OPCODE_EXPBASE2: 01205 /* TGSI_OPCODE_EX2 */ 01206 FETCH(&r[0], 0, CHAN_X); 01207 01208 r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); 01209 01210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01211 STORE( &r[0], 0, chan_index ); 01212 } 01213 break; 01214 01215 case TGSI_OPCODE_LOGBASE2: 01216 /* TGSI_OPCODE_LG2 */ 01217 FETCH( &r[0], 0, CHAN_X ); 01218 r[0].q = micro_lg2(r[0].q); 01219 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01220 STORE( &r[0], 0, chan_index ); 01221 } 01222 break; 01223 01224 case TGSI_OPCODE_POWER: 01225 /* TGSI_OPCODE_POW */ 01226 FETCH(&r[0], 0, CHAN_X); 01227 FETCH(&r[1], 1, CHAN_X); 01228 01229 r[0].q = micro_pow(r[0].q, r[1].q); 01230 01231 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01232 STORE( &r[0], 0, chan_index ); 01233 } 01234 break; 01235 01236 case TGSI_OPCODE_CROSSPRODUCT: 01237 /* TGSI_OPCODE_XPD */ 01238 FETCH(&r[0], 0, CHAN_Y); 01239 FETCH(&r[1], 1, CHAN_Z); 01240 FETCH(&r[3], 0, CHAN_Z); 01241 FETCH(&r[4], 1, CHAN_Y); 01242 01243 /* r2 = (r0 * r1) - (r3 * r5) 01244 */ 01245 r[2].q = si_fm(r[3].q, r[5].q); 01246 r[2].q = si_fms(r[0].q, r[1].q, r[2].q); 01247 01248 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 01249 STORE( &r[2], 0, CHAN_X ); 01250 } 01251 01252 FETCH(&r[2], 1, CHAN_X); 01253 FETCH(&r[5], 0, CHAN_X); 01254 01255 /* r3 = (r3 * r2) - (r1 * r5) 01256 */ 01257 r[1].q = si_fm(r[1].q, r[5].q); 01258 r[3].q = si_fms(r[3].q, r[2].q, r[1].q); 01259 01260 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 01261 STORE( &r[3], 0, CHAN_Y ); 01262 } 01263 01264 /* r5 = (r5 * r4) - (r0 * r2) 01265 */ 01266 r[0].q = si_fm(r[0].q, r[2].q); 01267 r[5].q = si_fms(r[5].q, r[4].q, r[0].q); 01268 01269 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 01270 STORE( &r[5], 0, CHAN_Z ); 01271 } 01272 01273 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 01274 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 01275 } 01276 break; 01277 01278 case TGSI_OPCODE_MULTIPLYMATRIX: 01279 ASSERT (0); 01280 break; 01281 01282 case TGSI_OPCODE_ABS: 01283 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01284 FETCH(&r[0], 0, chan_index); 01285 01286 r[0].q = micro_abs(r[0].q); 01287 01288 STORE(&r[0], 0, chan_index); 01289 } 01290 break; 01291 01292 case TGSI_OPCODE_RCC: 01293 ASSERT (0); 01294 break; 01295 01296 case TGSI_OPCODE_DPH: 01297 FETCH(&r[0], 0, CHAN_X); 01298 FETCH(&r[1], 1, CHAN_X); 01299 01300 r[0].q = si_fm(r[0].q, r[1].q); 01301 01302 FETCH(&r[1], 0, CHAN_Y); 01303 FETCH(&r[2], 1, CHAN_Y); 01304 01305 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01306 01307 FETCH(&r[1], 0, CHAN_Z); 01308 FETCH(&r[2], 1, CHAN_Z); 01309 01310 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01311 01312 FETCH(&r[1], 1, CHAN_W); 01313 01314 r[0].q = si_fa(r[0].q, r[1].q); 01315 01316 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01317 STORE( &r[0], 0, chan_index ); 01318 } 01319 break; 01320 01321 case TGSI_OPCODE_COS: 01322 FETCH(&r[0], 0, CHAN_X); 01323 01324 r[0].q = micro_cos(r[0].q); 01325 01326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01327 STORE( &r[0], 0, chan_index ); 01328 } 01329 break; 01330 01331 case TGSI_OPCODE_DDX: 01332 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01333 FETCH( &r[0], 0, chan_index ); 01334 r[0].q = micro_ddx(r[0].q); 01335 STORE( &r[0], 0, chan_index ); 01336 } 01337 break; 01338 01339 case TGSI_OPCODE_DDY: 01340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01341 FETCH( &r[0], 0, chan_index ); 01342 r[0].q = micro_ddy(r[0].q); 01343 STORE( &r[0], 0, chan_index ); 01344 } 01345 break; 01346 01347 case TGSI_OPCODE_KILP: 01348 exec_kilp (mach, inst); 01349 break; 01350 01351 case TGSI_OPCODE_KIL: 01352 exec_kil (mach, inst); 01353 break; 01354 01355 case TGSI_OPCODE_PK2H: 01356 ASSERT (0); 01357 break; 01358 01359 case TGSI_OPCODE_PK2US: 01360 ASSERT (0); 01361 break; 01362 01363 case TGSI_OPCODE_PK4B: 01364 ASSERT (0); 01365 break; 01366 01367 case TGSI_OPCODE_PK4UB: 01368 ASSERT (0); 01369 break; 01370 01371 case TGSI_OPCODE_RFL: 01372 ASSERT (0); 01373 break; 01374 01375 case TGSI_OPCODE_SEQ: 01376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01377 FETCH( &r[0], 0, chan_index ); 01378 FETCH( &r[1], 1, chan_index ); 01379 01380 r[0].q = si_fceq(r[0].q, r[1].q); 01381 01382 STORE( &r[0], 0, chan_index ); 01383 } 01384 break; 01385 01386 case TGSI_OPCODE_SFL: 01387 ASSERT (0); 01388 break; 01389 01390 case TGSI_OPCODE_SGT: 01391 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01392 FETCH( &r[0], 0, chan_index ); 01393 FETCH( &r[1], 1, chan_index ); 01394 r[0].q = si_fcgt(r[0].q, r[1].q); 01395 STORE( &r[0], 0, chan_index ); 01396 } 01397 break; 01398 01399 case TGSI_OPCODE_SIN: 01400 FETCH( &r[0], 0, CHAN_X ); 01401 r[0].q = micro_sin(r[0].q); 01402 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01403 STORE( &r[0], 0, chan_index ); 01404 } 01405 break; 01406 01407 case TGSI_OPCODE_SLE: 01408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01409 FETCH( &r[0], 0, chan_index ); 01410 FETCH( &r[1], 1, chan_index ); 01411 01412 r[0].q = si_fcgt(r[0].q, r[1].q); 01413 r[0].q = si_xori(r[0].q, 0xff); 01414 01415 STORE( &r[0], 0, chan_index ); 01416 } 01417 break; 01418 01419 case TGSI_OPCODE_SNE: 01420 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01421 FETCH( &r[0], 0, chan_index ); 01422 FETCH( &r[1], 1, chan_index ); 01423 01424 r[0].q = si_fceq(r[0].q, r[1].q); 01425 r[0].q = si_xori(r[0].q, 0xff); 01426 01427 STORE( &r[0], 0, chan_index ); 01428 } 01429 break; 01430 01431 case TGSI_OPCODE_STR: 01432 ASSERT (0); 01433 break; 01434 01435 case TGSI_OPCODE_TEX: 01436 /* simple texture lookup */ 01437 /* src[0] = texcoord */ 01438 /* src[1] = sampler unit */ 01439 exec_tex(mach, inst, FALSE, FALSE); 01440 break; 01441 01442 case TGSI_OPCODE_TXB: 01443 /* Texture lookup with lod bias */ 01444 /* src[0] = texcoord (src[0].w = load bias) */ 01445 /* src[1] = sampler unit */ 01446 exec_tex(mach, inst, TRUE, FALSE); 01447 break; 01448 01449 case TGSI_OPCODE_TXD: 01450 /* Texture lookup with explict partial derivatives */ 01451 /* src[0] = texcoord */ 01452 /* src[1] = d[strq]/dx */ 01453 /* src[2] = d[strq]/dy */ 01454 /* src[3] = sampler unit */ 01455 ASSERT (0); 01456 break; 01457 01458 case TGSI_OPCODE_TXL: 01459 /* Texture lookup with explit LOD */ 01460 /* src[0] = texcoord (src[0].w = load bias) */ 01461 /* src[1] = sampler unit */ 01462 exec_tex(mach, inst, TRUE, FALSE); 01463 break; 01464 01465 case TGSI_OPCODE_TXP: 01466 /* Texture lookup with projection */ 01467 /* src[0] = texcoord (src[0].w = projection) */ 01468 /* src[1] = sampler unit */ 01469 exec_tex(mach, inst, TRUE, TRUE); 01470 break; 01471 01472 case TGSI_OPCODE_UP2H: 01473 ASSERT (0); 01474 break; 01475 01476 case TGSI_OPCODE_UP2US: 01477 ASSERT (0); 01478 break; 01479 01480 case TGSI_OPCODE_UP4B: 01481 ASSERT (0); 01482 break; 01483 01484 case TGSI_OPCODE_UP4UB: 01485 ASSERT (0); 01486 break; 01487 01488 case TGSI_OPCODE_X2D: 01489 ASSERT (0); 01490 break; 01491 01492 case TGSI_OPCODE_ARA: 01493 ASSERT (0); 01494 break; 01495 01496 case TGSI_OPCODE_ARR: 01497 ASSERT (0); 01498 break; 01499 01500 case TGSI_OPCODE_BRA: 01501 ASSERT (0); 01502 break; 01503 01504 case TGSI_OPCODE_CAL: 01505 /* skip the call if no execution channels are enabled */ 01506 if (mach->ExecMask) { 01507 /* do the call */ 01508 01509 /* push the Cond, Loop, Cont stacks */ 01510 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 01511 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 01512 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 01513 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 01514 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 01515 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 01516 01517 ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 01518 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 01519 01520 /* note that PC was already incremented above */ 01521 mach->CallStack[mach->CallStackTop++] = *pc; 01522 *pc = inst->InstructionExtLabel.Label; 01523 } 01524 break; 01525 01526 case TGSI_OPCODE_RET: 01527 mach->FuncMask &= ~mach->ExecMask; 01528 UPDATE_EXEC_MASK(mach); 01529 01530 if (mach->ExecMask == 0x0) { 01531 /* really return now (otherwise, keep executing */ 01532 01533 if (mach->CallStackTop == 0) { 01534 /* returning from main() */ 01535 *pc = -1; 01536 return; 01537 } 01538 *pc = mach->CallStack[--mach->CallStackTop]; 01539 01540 /* pop the Cond, Loop, Cont stacks */ 01541 ASSERT(mach->CondStackTop > 0); 01542 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 01543 ASSERT(mach->LoopStackTop > 0); 01544 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 01545 ASSERT(mach->ContStackTop > 0); 01546 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 01547 ASSERT(mach->FuncStackTop > 0); 01548 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 01549 01550 UPDATE_EXEC_MASK(mach); 01551 } 01552 break; 01553 01554 case TGSI_OPCODE_SSG: 01555 ASSERT (0); 01556 break; 01557 01558 case TGSI_OPCODE_CMP: 01559 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01560 FETCH(&r[0], 0, chan_index); 01561 FETCH(&r[1], 1, chan_index); 01562 FETCH(&r[2], 2, chan_index); 01563 01564 /* r0 = (r0 < 0.0) ? r1 : r2 01565 */ 01566 r[3].q = si_xor(r[3].q, r[3].q); 01567 r[0].q = micro_lt(r[0].q, r[3].q); 01568 r[0].q = si_selb(r[1].q, r[2].q, r[0].q); 01569 01570 STORE(&r[0], 0, chan_index); 01571 } 01572 break; 01573 01574 case TGSI_OPCODE_SCS: 01575 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 01576 FETCH( &r[0], 0, CHAN_X ); 01577 } 01578 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 01579 r[1].q = micro_cos(r[0].q); 01580 STORE( &r[1], 0, CHAN_X ); 01581 } 01582 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 01583 r[1].q = micro_sin(r[0].q); 01584 STORE( &r[1], 0, CHAN_Y ); 01585 } 01586 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 01587 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 01588 } 01589 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 01590 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 01591 } 01592 break; 01593 01594 case TGSI_OPCODE_NRM: 01595 ASSERT (0); 01596 break; 01597 01598 case TGSI_OPCODE_DIV: 01599 ASSERT( 0 ); 01600 break; 01601 01602 case TGSI_OPCODE_DP2: 01603 FETCH( &r[0], 0, CHAN_X ); 01604 FETCH( &r[1], 1, CHAN_X ); 01605 r[0].q = si_fm(r[0].q, r[1].q); 01606 01607 FETCH( &r[1], 0, CHAN_Y ); 01608 FETCH( &r[2], 1, CHAN_Y ); 01609 r[0].q = si_fma(r[1].q, r[2].q, r[0].q); 01610 01611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01612 STORE( &r[0], 0, chan_index ); 01613 } 01614 break; 01615 01616 case TGSI_OPCODE_IF: 01617 /* push CondMask */ 01618 ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 01619 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 01620 FETCH( &r[0], 0, CHAN_X ); 01621 /* update CondMask */ 01622 if( ! r[0].u[0] ) { 01623 mach->CondMask &= ~0x1; 01624 } 01625 if( ! r[0].u[1] ) { 01626 mach->CondMask &= ~0x2; 01627 } 01628 if( ! r[0].u[2] ) { 01629 mach->CondMask &= ~0x4; 01630 } 01631 if( ! r[0].u[3] ) { 01632 mach->CondMask &= ~0x8; 01633 } 01634 UPDATE_EXEC_MASK(mach); 01635 /* Todo: If CondMask==0, jump to ELSE */ 01636 break; 01637 01638 case TGSI_OPCODE_ELSE: 01639 /* invert CondMask wrt previous mask */ 01640 { 01641 uint prevMask; 01642 ASSERT(mach->CondStackTop > 0); 01643 prevMask = mach->CondStack[mach->CondStackTop - 1]; 01644 mach->CondMask = ~mach->CondMask & prevMask; 01645 UPDATE_EXEC_MASK(mach); 01646 /* Todo: If CondMask==0, jump to ENDIF */ 01647 } 01648 break; 01649 01650 case TGSI_OPCODE_ENDIF: 01651 /* pop CondMask */ 01652 ASSERT(mach->CondStackTop > 0); 01653 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 01654 UPDATE_EXEC_MASK(mach); 01655 break; 01656 01657 case TGSI_OPCODE_END: 01658 /* halt execution */ 01659 *pc = -1; 01660 break; 01661 01662 case TGSI_OPCODE_REP: 01663 ASSERT (0); 01664 break; 01665 01666 case TGSI_OPCODE_ENDREP: 01667 ASSERT (0); 01668 break; 01669 01670 case TGSI_OPCODE_PUSHA: 01671 ASSERT (0); 01672 break; 01673 01674 case TGSI_OPCODE_POPA: 01675 ASSERT (0); 01676 break; 01677 01678 case TGSI_OPCODE_CEIL: 01679 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01680 FETCH( &r[0], 0, chan_index ); 01681 r[0].q = micro_ceil(r[0].q); 01682 STORE( &r[0], 0, chan_index ); 01683 } 01684 break; 01685 01686 case TGSI_OPCODE_I2F: 01687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01688 FETCH( &r[0], 0, chan_index ); 01689 r[0].q = si_csflt(r[0].q, 0); 01690 STORE( &r[0], 0, chan_index ); 01691 } 01692 break; 01693 01694 case TGSI_OPCODE_NOT: 01695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01696 FETCH( &r[0], 0, chan_index ); 01697 r[0].q = si_xorbi(r[0].q, 0xff); 01698 STORE( &r[0], 0, chan_index ); 01699 } 01700 break; 01701 01702 case TGSI_OPCODE_TRUNC: 01703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01704 FETCH( &r[0], 0, chan_index ); 01705 r[0].q = micro_trunc(r[0].q); 01706 STORE( &r[0], 0, chan_index ); 01707 } 01708 break; 01709 01710 case TGSI_OPCODE_SHL: 01711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01712 FETCH( &r[0], 0, chan_index ); 01713 FETCH( &r[1], 1, chan_index ); 01714 01715 r[0].q = si_shl(r[0].q, r[1].q); 01716 01717 STORE( &r[0], 0, chan_index ); 01718 } 01719 break; 01720 01721 case TGSI_OPCODE_SHR: 01722 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01723 FETCH( &r[0], 0, chan_index ); 01724 FETCH( &r[1], 1, chan_index ); 01725 r[0].q = micro_ishr(r[0].q, r[1].q); 01726 STORE( &r[0], 0, chan_index ); 01727 } 01728 break; 01729 01730 case TGSI_OPCODE_AND: 01731 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01732 FETCH( &r[0], 0, chan_index ); 01733 FETCH( &r[1], 1, chan_index ); 01734 r[0].q = si_and(r[0].q, r[1].q); 01735 STORE( &r[0], 0, chan_index ); 01736 } 01737 break; 01738 01739 case TGSI_OPCODE_OR: 01740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01741 FETCH( &r[0], 0, chan_index ); 01742 FETCH( &r[1], 1, chan_index ); 01743 r[0].q = si_or(r[0].q, r[1].q); 01744 STORE( &r[0], 0, chan_index ); 01745 } 01746 break; 01747 01748 case TGSI_OPCODE_MOD: 01749 ASSERT (0); 01750 break; 01751 01752 case TGSI_OPCODE_XOR: 01753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 01754 FETCH( &r[0], 0, chan_index ); 01755 FETCH( &r[1], 1, chan_index ); 01756 r[0].q = si_xor(r[0].q, r[1].q); 01757 STORE( &r[0], 0, chan_index ); 01758 } 01759 break; 01760 01761 case TGSI_OPCODE_SAD: 01762 ASSERT (0); 01763 break; 01764 01765 case TGSI_OPCODE_TXF: 01766 ASSERT (0); 01767 break; 01768 01769 case TGSI_OPCODE_TXQ: 01770 ASSERT (0); 01771 break; 01772 01773 case TGSI_OPCODE_EMIT: 01774 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 01775 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 01776 break; 01777 01778 case TGSI_OPCODE_ENDPRIM: 01779 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 01780 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 01781 break; 01782 01783 case TGSI_OPCODE_LOOP: 01784 /* fall-through (for now) */ 01785 case TGSI_OPCODE_BGNLOOP2: 01786 /* push LoopMask and ContMasks */ 01787 ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 01788 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 01789 ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 01790 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 01791 break; 01792 01793 case TGSI_OPCODE_ENDLOOP: 01794 /* fall-through (for now at least) */ 01795 case TGSI_OPCODE_ENDLOOP2: 01796 /* Restore ContMask, but don't pop */ 01797 ASSERT(mach->ContStackTop > 0); 01798 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 01799 if (mach->LoopMask) { 01800 /* repeat loop: jump to instruction just past BGNLOOP */ 01801 *pc = inst->InstructionExtLabel.Label + 1; 01802 } 01803 else { 01804 /* exit loop: pop LoopMask */ 01805 ASSERT(mach->LoopStackTop > 0); 01806 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 01807 /* pop ContMask */ 01808 ASSERT(mach->ContStackTop > 0); 01809 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 01810 } 01811 UPDATE_EXEC_MASK(mach); 01812 break; 01813 01814 case TGSI_OPCODE_BRK: 01815 /* turn off loop channels for each enabled exec channel */ 01816 mach->LoopMask &= ~mach->ExecMask; 01817 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 01818 UPDATE_EXEC_MASK(mach); 01819 break; 01820 01821 case TGSI_OPCODE_CONT: 01822 /* turn off cont channels for each enabled exec channel */ 01823 mach->ContMask &= ~mach->ExecMask; 01824 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 01825 UPDATE_EXEC_MASK(mach); 01826 break; 01827 01828 case TGSI_OPCODE_BGNSUB: 01829 /* no-op */ 01830 break; 01831 01832 case TGSI_OPCODE_ENDSUB: 01833 /* no-op */ 01834 break; 01835 01836 case TGSI_OPCODE_NOISE1: 01837 ASSERT( 0 ); 01838 break; 01839 01840 case TGSI_OPCODE_NOISE2: 01841 ASSERT( 0 ); 01842 break; 01843 01844 case TGSI_OPCODE_NOISE3: 01845 ASSERT( 0 ); 01846 break; 01847 01848 case TGSI_OPCODE_NOISE4: 01849 ASSERT( 0 ); 01850 break; 01851 01852 case TGSI_OPCODE_NOP: 01853 break; 01854 01855 default: 01856 ASSERT( 0 ); 01857 } 01858 }
static void exec_kil | ( | struct spu_exec_machine * | mach, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Execute ARB-style KIL which is predicated by a src register.
Kill fragment if any of the four values is less than zero.
Definition at line 605 of file spu_exec.c.
References spu_exec_channel::f, FETCH, tgsi_full_instruction::FullSrcRegisters, spu_exec_channel::i, swizzle(), TEMP_KILMASK_C, TEMP_KILMASK_I, TGSI_EXTSWIZZLE_ONE, TGSI_EXTSWIZZLE_ZERO, and tgsi_util_get_full_src_register_extswizzle().
00607 { 00608 uint uniquemask; 00609 uint chan_index; 00610 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 00611 union spu_exec_channel r[1]; 00612 00613 /* This mask stores component bits that were already tested. Note that 00614 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 00615 * tested. */ 00616 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 00617 00618 for (chan_index = 0; chan_index < 4; chan_index++) 00619 { 00620 uint swizzle; 00621 uint i; 00622 00623 /* unswizzle channel */ 00624 swizzle = tgsi_util_get_full_src_register_extswizzle ( 00625 &inst->FullSrcRegisters[0], 00626 chan_index); 00627 00628 /* check if the component has not been already tested */ 00629 if (uniquemask & (1 << swizzle)) 00630 continue; 00631 uniquemask |= 1 << swizzle; 00632 00633 FETCH(&r[0], 0, chan_index); 00634 for (i = 0; i < 4; i++) 00635 if (r[0].f[i] < 0.0f) 00636 kilmask |= 1 << i; 00637 } 00638 00639 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 00640 }
static void exec_kilp | ( | struct tgsi_exec_machine * | mach, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Execute NVIDIA-style KIL which is predicated by a condition code.
Kill fragment if the condition code is TRUE.
Definition at line 647 of file spu_exec.c.
References TEMP_KILMASK_C, TEMP_KILMASK_I, tgsi_exec_machine::Temps, tgsi_exec_channel::u, and tgsi_exec_vector::xyzw.
00649 { 00650 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 00651 00652 /* TODO: build kilmask from CC mask */ 00653 00654 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 00655 }
static void exec_tex | ( | struct spu_exec_machine * | mach, | |
const struct tgsi_full_instruction * | inst, | |||
boolean | biasLod, | |||
boolean | projected | |||
) | [static] |
Definition at line 686 of file spu_exec.c.
References ASSERT, CHAN_W, CHAN_X, CHAN_Y, CHAN_Z, spu_exec_channel::f, FETCH, fetch_texel(), FOR_EACH_ENABLED_CHANNEL, tgsi_full_instruction::FullSrcRegisters, tgsi_src_register::Index, tgsi_full_instruction::InstructionExtTexture, micro_div(), spu_exec_channel::q, spu_exec_machine::Samplers, tgsi_full_src_register::SrcRegister, STORE, tgsi_instruction_ext_texture::Texture, TGSI_TEXTURE_1D, TGSI_TEXTURE_2D, TGSI_TEXTURE_3D, TGSI_TEXTURE_CUBE, and TGSI_TEXTURE_RECT.
00689 { 00690 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 00691 union spu_exec_channel r[8]; 00692 uint chan_index; 00693 float lodBias; 00694 00695 /* printf("Sampler %u unit %u\n", sampler, unit); */ 00696 00697 switch (inst->InstructionExtTexture.Texture) { 00698 case TGSI_TEXTURE_1D: 00699 00700 FETCH(&r[0], 0, CHAN_X); 00701 00702 if (projected) { 00703 FETCH(&r[1], 0, CHAN_W); 00704 r[0].q = micro_div(r[0].q, r[1].q); 00705 } 00706 00707 if (biasLod) { 00708 FETCH(&r[1], 0, CHAN_W); 00709 lodBias = r[2].f[0]; 00710 } 00711 else 00712 lodBias = 0.0; 00713 00714 fetch_texel(&mach->Samplers[unit], 00715 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 00716 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 00717 break; 00718 00719 case TGSI_TEXTURE_2D: 00720 case TGSI_TEXTURE_RECT: 00721 00722 FETCH(&r[0], 0, CHAN_X); 00723 FETCH(&r[1], 0, CHAN_Y); 00724 FETCH(&r[2], 0, CHAN_Z); 00725 00726 if (projected) { 00727 FETCH(&r[3], 0, CHAN_W); 00728 r[0].q = micro_div(r[0].q, r[3].q); 00729 r[1].q = micro_div(r[1].q, r[3].q); 00730 r[2].q = micro_div(r[2].q, r[3].q); 00731 } 00732 00733 if (biasLod) { 00734 FETCH(&r[3], 0, CHAN_W); 00735 lodBias = r[3].f[0]; 00736 } 00737 else 00738 lodBias = 0.0; 00739 00740 fetch_texel(&mach->Samplers[unit], 00741 &r[0], &r[1], &r[2], lodBias, /* inputs */ 00742 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 00743 break; 00744 00745 case TGSI_TEXTURE_3D: 00746 case TGSI_TEXTURE_CUBE: 00747 00748 FETCH(&r[0], 0, CHAN_X); 00749 FETCH(&r[1], 0, CHAN_Y); 00750 FETCH(&r[2], 0, CHAN_Z); 00751 00752 if (projected) { 00753 FETCH(&r[3], 0, CHAN_W); 00754 r[0].q = micro_div(r[0].q, r[3].q); 00755 r[1].q = micro_div(r[1].q, r[3].q); 00756 r[2].q = micro_div(r[2].q, r[3].q); 00757 } 00758 00759 if (biasLod) { 00760 FETCH(&r[3], 0, CHAN_W); 00761 lodBias = r[3].f[0]; 00762 } 00763 else 00764 lodBias = 0.0; 00765 00766 fetch_texel(&mach->Samplers[unit], 00767 &r[0], &r[1], &r[2], lodBias, 00768 &r[0], &r[1], &r[2], &r[3]); 00769 break; 00770 00771 default: 00772 ASSERT (0); 00773 } 00774 00775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 00776 STORE( &r[chan_index], 0, chan_index ); 00777 } 00778 }
static void fetch_source | ( | const struct spu_exec_machine * | mach, | |
union spu_exec_channel * | chan, | |||
const struct tgsi_full_src_register * | reg, | |||
const uint | chan_index | |||
) | [static] |
Definition at line 430 of file spu_exec.c.
References ASSERT, CHAN_X, tgsi_src_register_ext_mod::Complement, tgsi_src_register::Dimension, fetch_src_file_channel(), tgsi_src_register::File, spu_exec_channel::i, tgsi_dimension::Index, tgsi_src_register::Index, tgsi_dimension::Indirect, tgsi_src_register::Indirect, micro_abs(), micro_neg(), micro_set_sign(), spu_exec_channel::q, tgsi_full_src_register::SrcRegister, tgsi_full_src_register::SrcRegisterDim, tgsi_full_src_register::SrcRegisterDimInd, tgsi_full_src_register::SrcRegisterExtMod, tgsi_full_src_register::SrcRegisterInd, swizzle(), TEMP_1_C, TEMP_1_I, TGSI_FILE_CONSTANT, TGSI_FILE_INPUT, tgsi_util_get_full_src_register_extswizzle(), tgsi_util_get_full_src_register_sign_mode(), tgsi_util_get_src_register_swizzle(), TGSI_UTIL_SIGN_CLEAR, TGSI_UTIL_SIGN_KEEP, TGSI_UTIL_SIGN_SET, and TGSI_UTIL_SIGN_TOGGLE.
00435 { 00436 union spu_exec_channel index; 00437 uint swizzle; 00438 00439 index.i[0] = 00440 index.i[1] = 00441 index.i[2] = 00442 index.i[3] = reg->SrcRegister.Index; 00443 00444 if (reg->SrcRegister.Indirect) { 00445 union spu_exec_channel index2; 00446 union spu_exec_channel indir_index; 00447 00448 index2.i[0] = 00449 index2.i[1] = 00450 index2.i[2] = 00451 index2.i[3] = reg->SrcRegisterInd.Index; 00452 00453 swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, 00454 CHAN_X); 00455 fetch_src_file_channel( 00456 mach, 00457 reg->SrcRegisterInd.File, 00458 swizzle, 00459 &index2, 00460 &indir_index ); 00461 00462 index.q = si_a(index.q, indir_index.q); 00463 } 00464 00465 if( reg->SrcRegister.Dimension ) { 00466 switch( reg->SrcRegister.File ) { 00467 case TGSI_FILE_INPUT: 00468 index.q = si_mpyi(index.q, 17); 00469 break; 00470 case TGSI_FILE_CONSTANT: 00471 index.q = si_shli(index.q, 12); 00472 break; 00473 default: 00474 ASSERT( 0 ); 00475 } 00476 00477 index.i[0] += reg->SrcRegisterDim.Index; 00478 index.i[1] += reg->SrcRegisterDim.Index; 00479 index.i[2] += reg->SrcRegisterDim.Index; 00480 index.i[3] += reg->SrcRegisterDim.Index; 00481 00482 if (reg->SrcRegisterDim.Indirect) { 00483 union spu_exec_channel index2; 00484 union spu_exec_channel indir_index; 00485 00486 index2.i[0] = 00487 index2.i[1] = 00488 index2.i[2] = 00489 index2.i[3] = reg->SrcRegisterDimInd.Index; 00490 00491 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 00492 fetch_src_file_channel( 00493 mach, 00494 reg->SrcRegisterDimInd.File, 00495 swizzle, 00496 &index2, 00497 &indir_index ); 00498 00499 index.q = si_a(index.q, indir_index.q); 00500 } 00501 } 00502 00503 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 00504 fetch_src_file_channel( 00505 mach, 00506 reg->SrcRegister.File, 00507 swizzle, 00508 &index, 00509 chan ); 00510 00511 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 00512 case TGSI_UTIL_SIGN_CLEAR: 00513 chan->q = micro_abs(chan->q); 00514 break; 00515 00516 case TGSI_UTIL_SIGN_SET: 00517 chan->q = micro_set_sign(chan->q); 00518 break; 00519 00520 case TGSI_UTIL_SIGN_TOGGLE: 00521 chan->q = micro_neg(chan->q); 00522 break; 00523 00524 case TGSI_UTIL_SIGN_KEEP: 00525 break; 00526 } 00527 00528 if (reg->SrcRegisterExtMod.Complement) { 00529 chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); 00530 } 00531 }
static void fetch_src_file_channel | ( | const struct spu_exec_machine * | mach, | |
const uint | file, | |||
const uint | swizzle, | |||
const union spu_exec_channel * | index, | |||
union spu_exec_channel * | chan | |||
) | [static] |
Definition at line 341 of file spu_exec.c.
References spu_exec_machine::Addrs, ASSERT, spu_exec_machine::Consts, spu_exec_channel::f, spu_exec_channel::i, spu_exec_machine::ImmLimit, spu_exec_machine::Imms, spu_exec_machine::Inputs, spu_exec_machine::Outputs, spu_dcache_fetch_unaligned(), TEMP_0_C, TEMP_0_I, TEMP_1_C, TEMP_1_I, TGSI_EXTSWIZZLE_ONE, TGSI_EXTSWIZZLE_W, TGSI_EXTSWIZZLE_X, TGSI_EXTSWIZZLE_Y, TGSI_EXTSWIZZLE_Z, TGSI_EXTSWIZZLE_ZERO, TGSI_FILE_ADDRESS, TGSI_FILE_CONSTANT, TGSI_FILE_IMMEDIATE, TGSI_FILE_INPUT, TGSI_FILE_OUTPUT, TGSI_FILE_TEMPORARY, spu_exec_channel::u, and spu_exec_vector::xyzw.
00347 { 00348 switch( swizzle ) { 00349 case TGSI_EXTSWIZZLE_X: 00350 case TGSI_EXTSWIZZLE_Y: 00351 case TGSI_EXTSWIZZLE_Z: 00352 case TGSI_EXTSWIZZLE_W: 00353 switch( file ) { 00354 case TGSI_FILE_CONSTANT: { 00355 unsigned i; 00356 00357 for (i = 0; i < 4; i++) { 00358 const float *ptr = mach->Consts[index->i[i]]; 00359 float tmp[4]; 00360 00361 spu_dcache_fetch_unaligned((qword *) tmp, 00362 (uintptr_t)(ptr + swizzle), 00363 sizeof(float)); 00364 00365 chan->f[i] = tmp[0]; 00366 } 00367 break; 00368 } 00369 00370 case TGSI_FILE_INPUT: 00371 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 00372 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 00373 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 00374 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 00375 break; 00376 00377 case TGSI_FILE_TEMPORARY: 00378 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 00379 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 00380 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 00381 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 00382 break; 00383 00384 case TGSI_FILE_IMMEDIATE: 00385 ASSERT( index->i[0] < (int) mach->ImmLimit ); 00386 ASSERT( index->i[1] < (int) mach->ImmLimit ); 00387 ASSERT( index->i[2] < (int) mach->ImmLimit ); 00388 ASSERT( index->i[3] < (int) mach->ImmLimit ); 00389 00390 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 00391 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 00392 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 00393 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 00394 break; 00395 00396 case TGSI_FILE_ADDRESS: 00397 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 00398 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 00399 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 00400 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 00401 break; 00402 00403 case TGSI_FILE_OUTPUT: 00404 /* vertex/fragment output vars can be read too */ 00405 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 00406 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 00407 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 00408 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 00409 break; 00410 00411 default: 00412 ASSERT( 0 ); 00413 } 00414 break; 00415 00416 case TGSI_EXTSWIZZLE_ZERO: 00417 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 00418 break; 00419 00420 case TGSI_EXTSWIZZLE_ONE: 00421 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 00422 break; 00423 00424 default: 00425 ASSERT( 0 ); 00426 } 00427 }
static void fetch_texel | ( | struct spu_sampler * | sampler, | |
const union spu_exec_channel * | s, | |||
const union spu_exec_channel * | t, | |||
const union spu_exec_channel * | p, | |||
float | lodbias, | |||
union spu_exec_channel * | r, | |||
union spu_exec_channel * | g, | |||
union spu_exec_channel * | b, | |||
union spu_exec_channel * | a | |||
) | [static] |
Definition at line 661 of file spu_exec.c.
References spu_exec_channel::f, spu_sampler::get_samples, and spu_exec_channel::q.
00670 { 00671 qword rgba[4]; 00672 qword out[4]; 00673 00674 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, 00675 (float (*)[4]) rgba); 00676 00677 _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); 00678 r->q = out[0]; 00679 g->q = out[1]; 00680 b->q = out[2]; 00681 a->q = out[3]; 00682 }
static void linear_interpolation | ( | struct spu_exec_machine * | mach, | |
unsigned | attrib, | |||
unsigned | chan | |||
) | [static] |
Definition at line 796 of file spu_exec.c.
References spu_interp_coef::a0, spu_interp_coef::dadx, spu_interp_coef::dady, spu_exec_channel::f, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, spu_exec_machine::QuadPos, and spu_exec_vector::xyzw.
00800 { 00801 const float x = mach->QuadPos.xyzw[0].f[0]; 00802 const float y = mach->QuadPos.xyzw[1].f[0]; 00803 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 00804 const float dady = mach->InterpCoefs[attrib].dady[chan]; 00805 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 00806 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 00807 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 00808 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 00809 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 00810 }
static qword micro_abs | ( | qword | src | ) | [static] |
static qword micro_ceil | ( | qword | src | ) | [static] |
static qword micro_cos | ( | qword | src | ) | [static] |
static qword micro_ddx | ( | qword | src | ) | [static] |
static qword micro_ddy | ( | qword | src | ) | [static] |
static qword micro_div | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_flr | ( | qword | src | ) | [static] |
static qword micro_frc | ( | qword | src | ) | [static] |
static qword micro_ge | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_ishr | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_lg2 | ( | qword | src | ) | [static] |
static qword micro_lt | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
Definition at line 269 of file spu_exec.c.
00270 { 00271 const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); 00272 00273 return si_xori(tmp, 0xff); 00274 }
static qword micro_max | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_min | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_neg | ( | qword | src | ) | [static] |
static qword micro_pow | ( | qword | src0, | |
qword | src1 | |||
) | [static] |
static qword micro_rnd | ( | qword | src | ) | [static] |
Definition at line 307 of file spu_exec.c.
00308 { 00309 const qword half = (qword) spu_splats(0.5f); 00310 00311 /* May be able to use _roundf4. There may be some difference, though. 00312 */ 00313 return (qword) _floorf4((vec_float4) si_fa(src, half)); 00314 }
static qword micro_set_sign | ( | qword | src | ) | [static] |
static qword micro_sin | ( | qword | src | ) | [static] |
static qword micro_sqrt | ( | qword | src | ) | [static] |
static qword micro_trunc | ( | qword | src | ) | [static] |
static void perspective_interpolation | ( | struct spu_exec_machine * | mach, | |
unsigned | attrib, | |||
unsigned | chan | |||
) | [static] |
Definition at line 813 of file spu_exec.c.
References spu_interp_coef::a0, spu_interp_coef::dadx, spu_interp_coef::dady, spu_exec_channel::f, spu_exec_machine::Inputs, spu_exec_machine::InterpCoefs, spu_exec_machine::QuadPos, and spu_exec_vector::xyzw.
00817 { 00818 const float x = mach->QuadPos.xyzw[0].f[0]; 00819 const float y = mach->QuadPos.xyzw[1].f[0]; 00820 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 00821 const float dady = mach->InterpCoefs[attrib].dady[chan]; 00822 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 00823 const float *w = mach->QuadPos.xyzw[3].f; 00824 /* divide by W here */ 00825 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 00826 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 00827 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 00828 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 00829 }
void spu_exec_machine_init | ( | struct spu_exec_machine * | mach, | |
uint | numSamplers, | |||
struct spu_sampler * | samplers, | |||
unsigned | processor | |||
) |
Initialize machine state by expanding tokens to full instructions, allocating temporary storage, setting up constants, etc.
After this, we can call spu_exec_machine_run() many times.
Definition at line 143 of file spu_exec.c.
References spu_exec_machine::Addrs, spu_exec_machine::Processor, spu_exec_machine::Samplers, TEMP_0_C, TEMP_0_I, TEMP_128_C, TEMP_128_I, TEMP_1_C, TEMP_1_I, TEMP_2_C, TEMP_2_I, TEMP_7F_C, TEMP_7F_I, TEMP_80_C, TEMP_80_I, TEMP_FF_C, TEMP_FF_I, TEMP_M128_C, TEMP_M128_I, and TGSI_EXEC_NUM_TEMPS.
00147 { 00148 const qword zero = si_il(0); 00149 const qword not_zero = si_il(~0); 00150 00151 (void) numSamplers; 00152 mach->Samplers = samplers; 00153 mach->Processor = processor; 00154 mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; 00155 00156 /* Setup constants. */ 00157 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; 00158 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; 00159 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); 00160 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); 00161 00162 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); 00163 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); 00164 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); 00165 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); 00166 }
uint spu_exec_machine_run | ( | struct spu_exec_machine * | mach | ) |
Run TGSI interpreter.
Definition at line 1866 of file spu_exec.c.
References ALIGN16_ATTRIB, ASSERT, spu_exec_machine::CallStackTop, spu_exec_machine::CondMask, spu_exec_machine::CondStackTop, spu_exec_machine::ContMask, spu_exec_machine::ContStackTop, spu_exec_machine::Declarations, exec_declaration(), exec_instruction(), spu_exec_machine::ExecMask, spu_exec_channel::f, spu_exec_machine::FuncMask, spu_exec_channel::i, spu_exec_machine::Instructions, spu_exec_machine::LoopMask, spu_exec_machine::LoopStackTop, spu_exec_machine::NumDeclarations, spu_exec_machine::Outputs, spu_exec_machine::Primitives, spu_exec_machine::Processor, ROUNDUP16, spu_dcache_fetch_unaligned(), TEMP_KILMASK_C, TEMP_KILMASK_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TEMP_PRIMITIVE_C, TEMP_PRIMITIVE_I, TGSI_PROCESSOR_FRAGMENT, TGSI_PROCESSOR_GEOMETRY, and spu_exec_vector::xyzw.
01867 { 01868 uint i; 01869 int pc = 0; 01870 01871 mach->CondMask = 0xf; 01872 mach->LoopMask = 0xf; 01873 mach->ContMask = 0xf; 01874 mach->FuncMask = 0xf; 01875 mach->ExecMask = 0xf; 01876 01877 mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ 01878 ASSERT(mach->CondStackTop == 0); 01879 ASSERT(mach->LoopStackTop == 0); 01880 ASSERT(mach->ContStackTop == 0); 01881 ASSERT(mach->CallStackTop == 0); 01882 01883 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 01884 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 01885 01886 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 01887 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 01888 mach->Primitives[0] = 0; 01889 } 01890 01891 01892 /* execute declarations (interpolants) */ 01893 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 01894 for (i = 0; i < mach->NumDeclarations; i++) { 01895 union { 01896 struct tgsi_full_declaration decl; 01897 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; 01898 } d ALIGN16_ATTRIB; 01899 unsigned ea = (unsigned) (mach->Declarations + pc); 01900 01901 spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); 01902 01903 exec_declaration( mach, &d.decl ); 01904 } 01905 } 01906 01907 /* execute instructions, until pc is set to -1 */ 01908 while (pc != -1) { 01909 union { 01910 struct tgsi_full_instruction inst; 01911 qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; 01912 } i ALIGN16_ATTRIB; 01913 unsigned ea = (unsigned) (mach->Instructions + pc); 01914 01915 spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); 01916 exec_instruction( mach, & i.inst, &pc ); 01917 } 01918 01919 #if 0 01920 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 01921 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 01922 /* 01923 * Scale back depth component. 01924 */ 01925 for (i = 0; i < 4; i++) 01926 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 01927 } 01928 #endif 01929 01930 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 01931 }
static void store_dest | ( | struct spu_exec_machine * | mach, | |
const union spu_exec_channel * | chan, | |||
const struct tgsi_full_dst_register * | reg, | |||
const struct tgsi_full_instruction * | inst, | |||
uint | chan_index | |||
) | [static] |
Definition at line 534 of file spu_exec.c.
References spu_exec_machine::Addrs, ASSERT, tgsi_full_dst_register::DstRegister, spu_exec_machine::ExecMask, tgsi_dst_register::File, spu_exec_channel::i, tgsi_dst_register::Index, tgsi_full_instruction::Instruction, micro_max(), micro_min(), spu_exec_machine::Outputs, spu_exec_channel::q, tgsi_instruction::Saturate, TEMP_0_C, TEMP_0_I, TEMP_1_C, TEMP_1_I, TEMP_OUTPUT_C, TEMP_OUTPUT_I, TGSI_FILE_ADDRESS, TGSI_FILE_NULL, TGSI_FILE_OUTPUT, TGSI_FILE_TEMPORARY, TGSI_SAT_MINUS_PLUS_ONE, TGSI_SAT_NONE, TGSI_SAT_ZERO_ONE, and spu_exec_vector::xyzw.
00540 { 00541 union spu_exec_channel *dst; 00542 00543 switch( reg->DstRegister.File ) { 00544 case TGSI_FILE_NULL: 00545 return; 00546 00547 case TGSI_FILE_OUTPUT: 00548 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 00549 + reg->DstRegister.Index].xyzw[chan_index]; 00550 break; 00551 00552 case TGSI_FILE_TEMPORARY: 00553 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 00554 break; 00555 00556 case TGSI_FILE_ADDRESS: 00557 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 00558 break; 00559 00560 default: 00561 ASSERT( 0 ); 00562 return; 00563 } 00564 00565 switch (inst->Instruction.Saturate) 00566 { 00567 case TGSI_SAT_NONE: 00568 if (mach->ExecMask & 0x1) 00569 dst->i[0] = chan->i[0]; 00570 if (mach->ExecMask & 0x2) 00571 dst->i[1] = chan->i[1]; 00572 if (mach->ExecMask & 0x4) 00573 dst->i[2] = chan->i[2]; 00574 if (mach->ExecMask & 0x8) 00575 dst->i[3] = chan->i[3]; 00576 break; 00577 00578 case TGSI_SAT_ZERO_ONE: 00579 /* XXX need to obey ExecMask here */ 00580 dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); 00581 dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); 00582 break; 00583 00584 case TGSI_SAT_MINUS_PLUS_ONE: 00585 ASSERT( 0 ); 00586 break; 00587 00588 default: 00589 ASSERT( 0 ); 00590 } 00591 }
const qword bl_shuf [static] |
Initial value:
{ 2 + 0, 2 + 1, 2 + 2, 2 + 3, 2 + 0, 2 + 1, 2 + 2, 2 + 3, 2 + 0, 2 + 1, 2 + 2, 2 + 3, 2 + 0, 2 + 1, 2 + 2, 2 + 3, }
Definition at line 198 of file spu_exec.c.
const qword br_shuf [static] |
Initial value:
{ 3 + 0, 3 + 1, 3 + 2, 3 + 3, 3 + 0, 3 + 1, 3 + 2, 3 + 3, 3 + 0, 3 + 1, 3 + 2, 3 + 3, 3 + 0, 3 + 1, 3 + 2, 3 + 3, }
Definition at line 187 of file spu_exec.c.
const qword tl_shuf [static] |
Initial value:
{ 0 + 0, 0 + 1, 0 + 2, 0 + 3, 0 + 0, 0 + 1, 0 + 2, 0 + 3, 0 + 0, 0 + 1, 0 + 2, 0 + 3, 0 + 0, 0 + 1, 0 + 2, 0 + 3, }
Definition at line 209 of file spu_exec.c.