tgsi_exec.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00053 #include "pipe/p_compiler.h"
00054 #include "pipe/p_state.h"
00055 #include "pipe/p_shader_tokens.h"
00056 #include "tgsi/tgsi_parse.h"
00057 #include "tgsi/tgsi_util.h"
00058 #include "tgsi_exec.h"
00059 #include "util/u_memory.h"
00060 #include "util/u_math.h"
00061 
00062 #define FAST_MATH 1
00063 
00064 #define TILE_TOP_LEFT     0
00065 #define TILE_TOP_RIGHT    1
00066 #define TILE_BOTTOM_LEFT  2
00067 #define TILE_BOTTOM_RIGHT 3
00068 
00069 #define CHAN_X  0
00070 #define CHAN_Y  1
00071 #define CHAN_Z  2
00072 #define CHAN_W  3
00073 
00074 /*
00075  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
00076  */
00077 #define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
00078 #define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
00079 #define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
00080 #define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
00081 #define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
00082 #define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
00083 #define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
00084 #define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
00085 #define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
00086 #define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
00087 #define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
00088 #define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
00089 #define TEMP_128_I         TGSI_EXEC_TEMP_128_I
00090 #define TEMP_128_C         TGSI_EXEC_TEMP_128_C
00091 #define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
00092 #define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
00093 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
00094 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
00095 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
00096 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
00097 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
00098 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
00099 #define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
00100 #define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
00101 #define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
00102 #define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
00103 #define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
00104 #define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
00105 #define TEMP_R0            TGSI_EXEC_TEMP_R0
00106 
00107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
00108    ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
00109 
00110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
00111    ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
00112 
00113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
00114    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
00115       if (IS_CHANNEL_ENABLED( INST, CHAN ))
00116 
00117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
00118    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
00119       if (IS_CHANNEL_ENABLED2( INST, CHAN ))
00120 
00121 
00123 #define UPDATE_EXEC_MASK(MACH) \
00124       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
00125 
00131 void 
00132 tgsi_exec_machine_bind_shader(
00133    struct tgsi_exec_machine *mach,
00134    const struct tgsi_token *tokens,
00135    uint numSamplers,
00136    struct tgsi_sampler *samplers)
00137 {
00138    uint k;
00139    struct tgsi_parse_context parse;
00140    struct tgsi_exec_labels *labels = &mach->Labels;
00141    struct tgsi_full_instruction *instructions;
00142    struct tgsi_full_declaration *declarations;
00143    uint maxInstructions = 10, numInstructions = 0;
00144    uint maxDeclarations = 10, numDeclarations = 0;
00145    uint instno = 0;
00146 
00147 #if 0
00148    tgsi_dump(tokens, 0);
00149 #endif
00150 
00151    util_init_math();
00152 
00153    mach->Tokens = tokens;
00154    mach->Samplers = samplers;
00155 
00156    k = tgsi_parse_init (&parse, mach->Tokens);
00157    if (k != TGSI_PARSE_OK) {
00158       debug_printf( "Problem parsing!\n" );
00159       return;
00160    }
00161 
00162    mach->Processor = parse.FullHeader.Processor.Processor;
00163    mach->ImmLimit = 0;
00164    labels->count = 0;
00165 
00166    declarations = (struct tgsi_full_declaration *)
00167       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
00168 
00169    if (!declarations) {
00170       return;
00171    }
00172 
00173    instructions = (struct tgsi_full_instruction *)
00174       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
00175 
00176    if (!instructions) {
00177       FREE( declarations );
00178       return;
00179    }
00180 
00181    while( !tgsi_parse_end_of_tokens( &parse ) ) {
00182       uint pointer = parse.Position;
00183       uint i;
00184 
00185       tgsi_parse_token( &parse );
00186       switch( parse.FullToken.Token.Type ) {
00187       case TGSI_TOKEN_TYPE_DECLARATION:
00188          /* save expanded declaration */
00189          if (numDeclarations == maxDeclarations) {
00190             declarations = REALLOC(declarations,
00191                                    maxDeclarations
00192                                    * sizeof(struct tgsi_full_declaration),
00193                                    (maxDeclarations + 10)
00194                                    * sizeof(struct tgsi_full_declaration));
00195             maxDeclarations += 10;
00196          }
00197          memcpy(declarations + numDeclarations,
00198                 &parse.FullToken.FullDeclaration,
00199                 sizeof(declarations[0]));
00200          numDeclarations++;
00201          break;
00202 
00203       case TGSI_TOKEN_TYPE_IMMEDIATE:
00204          {
00205             uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
00206             assert( size % 4 == 0 );
00207             assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
00208 
00209             for( i = 0; i < size; i++ ) {
00210                mach->Imms[mach->ImmLimit + i / 4][i % 4] = 
00211                   parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
00212             }
00213             mach->ImmLimit += size / 4;
00214          }
00215          break;
00216 
00217       case TGSI_TOKEN_TYPE_INSTRUCTION:
00218          assert( labels->count < MAX_LABELS );
00219 
00220          labels->labels[labels->count][0] = instno;
00221          labels->labels[labels->count][1] = pointer;
00222          labels->count++;
00223 
00224          /* save expanded instruction */
00225          if (numInstructions == maxInstructions) {
00226             instructions = REALLOC(instructions,
00227                                    maxInstructions
00228                                    * sizeof(struct tgsi_full_instruction),
00229                                    (maxInstructions + 10)
00230                                    * sizeof(struct tgsi_full_instruction));
00231             maxInstructions += 10;
00232          }
00233          memcpy(instructions + numInstructions,
00234                 &parse.FullToken.FullInstruction,
00235                 sizeof(instructions[0]));
00236          numInstructions++;
00237          break;
00238 
00239       default:
00240          assert( 0 );
00241       }
00242    }
00243    tgsi_parse_free (&parse);
00244 
00245    if (mach->Declarations) {
00246       FREE( mach->Declarations );
00247    }
00248    mach->Declarations = declarations;
00249    mach->NumDeclarations = numDeclarations;
00250 
00251    if (mach->Instructions) {
00252       FREE( mach->Instructions );
00253    }
00254    mach->Instructions = instructions;
00255    mach->NumInstructions = numInstructions;
00256 }
00257 
00258 
00259 void
00260 tgsi_exec_machine_init(
00261    struct tgsi_exec_machine *mach )
00262 {
00263    uint i;
00264 
00265    mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
00266    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
00267 
00268    /* Setup constants. */
00269    for( i = 0; i < 4; i++ ) {
00270       mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
00271       mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
00272       mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
00273       mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
00274       mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
00275       mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
00276       mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
00277       mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
00278       mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
00279       mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
00280    }
00281 }
00282 
00283 
00284 void
00285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
00286 {
00287    if (mach->Instructions) {
00288       FREE(mach->Instructions);
00289       mach->Instructions = NULL;
00290       mach->NumInstructions = 0;
00291    }
00292    if (mach->Declarations) {
00293       FREE(mach->Declarations);
00294       mach->Declarations = NULL;
00295       mach->NumDeclarations = 0;
00296    }
00297 }
00298 
00299 
00300 static void
00301 micro_abs(
00302    union tgsi_exec_channel *dst,
00303    const union tgsi_exec_channel *src )
00304 {
00305    dst->f[0] = fabsf( src->f[0] );
00306    dst->f[1] = fabsf( src->f[1] );
00307    dst->f[2] = fabsf( src->f[2] );
00308    dst->f[3] = fabsf( src->f[3] );
00309 }
00310 
00311 static void
00312 micro_add(
00313    union tgsi_exec_channel *dst,
00314    const union tgsi_exec_channel *src0,
00315    const union tgsi_exec_channel *src1 )
00316 {
00317    dst->f[0] = src0->f[0] + src1->f[0];
00318    dst->f[1] = src0->f[1] + src1->f[1];
00319    dst->f[2] = src0->f[2] + src1->f[2];
00320    dst->f[3] = src0->f[3] + src1->f[3];
00321 }
00322 
00323 static void
00324 micro_iadd(
00325    union tgsi_exec_channel *dst,
00326    const union tgsi_exec_channel *src0,
00327    const union tgsi_exec_channel *src1 )
00328 {
00329    dst->i[0] = src0->i[0] + src1->i[0];
00330    dst->i[1] = src0->i[1] + src1->i[1];
00331    dst->i[2] = src0->i[2] + src1->i[2];
00332    dst->i[3] = src0->i[3] + src1->i[3];
00333 }
00334 
00335 static void
00336 micro_and(
00337    union tgsi_exec_channel *dst,
00338    const union tgsi_exec_channel *src0,
00339    const union tgsi_exec_channel *src1 )
00340 {
00341    dst->u[0] = src0->u[0] & src1->u[0];
00342    dst->u[1] = src0->u[1] & src1->u[1];
00343    dst->u[2] = src0->u[2] & src1->u[2];
00344    dst->u[3] = src0->u[3] & src1->u[3];
00345 }
00346 
00347 static void
00348 micro_ceil(
00349    union tgsi_exec_channel *dst,
00350    const union tgsi_exec_channel *src )
00351 {
00352    dst->f[0] = ceilf( src->f[0] );
00353    dst->f[1] = ceilf( src->f[1] );
00354    dst->f[2] = ceilf( src->f[2] );
00355    dst->f[3] = ceilf( src->f[3] );
00356 }
00357 
00358 static void
00359 micro_cos(
00360    union tgsi_exec_channel *dst,
00361    const union tgsi_exec_channel *src )
00362 {
00363    dst->f[0] = cosf( src->f[0] );
00364    dst->f[1] = cosf( src->f[1] );
00365    dst->f[2] = cosf( src->f[2] );
00366    dst->f[3] = cosf( src->f[3] );
00367 }
00368 
00369 static void
00370 micro_ddx(
00371    union tgsi_exec_channel *dst,
00372    const union tgsi_exec_channel *src )
00373 {
00374    dst->f[0] =
00375    dst->f[1] =
00376    dst->f[2] =
00377    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
00378 }
00379 
00380 static void
00381 micro_ddy(
00382    union tgsi_exec_channel *dst,
00383    const union tgsi_exec_channel *src )
00384 {
00385    dst->f[0] =
00386    dst->f[1] =
00387    dst->f[2] =
00388    dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
00389 }
00390 
00391 static void
00392 micro_div(
00393    union tgsi_exec_channel *dst,
00394    const union tgsi_exec_channel *src0,
00395    const union tgsi_exec_channel *src1 )
00396 {
00397    if (src1->f[0] != 0) {
00398       dst->f[0] = src0->f[0] / src1->f[0];
00399    }
00400    if (src1->f[1] != 0) {
00401       dst->f[1] = src0->f[1] / src1->f[1];
00402    }
00403    if (src1->f[2] != 0) {
00404       dst->f[2] = src0->f[2] / src1->f[2];
00405    }
00406    if (src1->f[3] != 0) {
00407       dst->f[3] = src0->f[3] / src1->f[3];
00408    }
00409 }
00410 
00411 static void
00412 micro_udiv(
00413    union tgsi_exec_channel *dst,
00414    const union tgsi_exec_channel *src0,
00415    const union tgsi_exec_channel *src1 )
00416 {
00417    dst->u[0] = src0->u[0] / src1->u[0];
00418    dst->u[1] = src0->u[1] / src1->u[1];
00419    dst->u[2] = src0->u[2] / src1->u[2];
00420    dst->u[3] = src0->u[3] / src1->u[3];
00421 }
00422 
00423 static void
00424 micro_eq(
00425    union tgsi_exec_channel *dst,
00426    const union tgsi_exec_channel *src0,
00427    const union tgsi_exec_channel *src1,
00428    const union tgsi_exec_channel *src2,
00429    const union tgsi_exec_channel *src3 )
00430 {
00431    dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
00432    dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
00433    dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
00434    dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
00435 }
00436 
00437 static void
00438 micro_ieq(
00439    union tgsi_exec_channel *dst,
00440    const union tgsi_exec_channel *src0,
00441    const union tgsi_exec_channel *src1,
00442    const union tgsi_exec_channel *src2,
00443    const union tgsi_exec_channel *src3 )
00444 {
00445    dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
00446    dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
00447    dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
00448    dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
00449 }
00450 
00451 static void
00452 micro_exp2(
00453    union tgsi_exec_channel *dst,
00454    const union tgsi_exec_channel *src)
00455 {
00456 #if FAST_MATH
00457    dst->f[0] = util_fast_exp2( src->f[0] );
00458    dst->f[1] = util_fast_exp2( src->f[1] );
00459    dst->f[2] = util_fast_exp2( src->f[2] );
00460    dst->f[3] = util_fast_exp2( src->f[3] );
00461 #else
00462    dst->f[0] = powf( 2.0f, src->f[0] );
00463    dst->f[1] = powf( 2.0f, src->f[1] );
00464    dst->f[2] = powf( 2.0f, src->f[2] );
00465    dst->f[3] = powf( 2.0f, src->f[3] );
00466 #endif
00467 }
00468 
00469 static void
00470 micro_f2ut(
00471    union tgsi_exec_channel *dst,
00472    const union tgsi_exec_channel *src )
00473 {
00474    dst->u[0] = (uint) src->f[0];
00475    dst->u[1] = (uint) src->f[1];
00476    dst->u[2] = (uint) src->f[2];
00477    dst->u[3] = (uint) src->f[3];
00478 }
00479 
00480 static void
00481 micro_float_clamp(union tgsi_exec_channel *dst,
00482                   const union tgsi_exec_channel *src)
00483 {
00484    uint i;
00485 
00486    for (i = 0; i < 4; i++) {
00487       if (src->f[i] > 0.0f) {
00488          if (src->f[i] > 1.884467e+019f)
00489             dst->f[i] = 1.884467e+019f;
00490          else if (src->f[i] < 5.42101e-020f)
00491             dst->f[i] = 5.42101e-020f;
00492          else
00493             dst->f[i] = src->f[i];
00494       }
00495       else {
00496          if (src->f[i] < -1.884467e+019f)
00497             dst->f[i] = -1.884467e+019f;
00498          else if (src->f[i] > -5.42101e-020f)
00499             dst->f[i] = -5.42101e-020f;
00500          else
00501             dst->f[i] = src->f[i];
00502       }
00503    }
00504 }
00505 
00506 static void
00507 micro_flr(
00508    union tgsi_exec_channel *dst,
00509    const union tgsi_exec_channel *src )
00510 {
00511    dst->f[0] = floorf( src->f[0] );
00512    dst->f[1] = floorf( src->f[1] );
00513    dst->f[2] = floorf( src->f[2] );
00514    dst->f[3] = floorf( src->f[3] );
00515 }
00516 
00517 static void
00518 micro_frc(
00519    union tgsi_exec_channel *dst,
00520    const union tgsi_exec_channel *src )
00521 {
00522    dst->f[0] = src->f[0] - floorf( src->f[0] );
00523    dst->f[1] = src->f[1] - floorf( src->f[1] );
00524    dst->f[2] = src->f[2] - floorf( src->f[2] );
00525    dst->f[3] = src->f[3] - floorf( src->f[3] );
00526 }
00527 
00528 static void
00529 micro_i2f(
00530    union tgsi_exec_channel *dst,
00531    const union tgsi_exec_channel *src )
00532 {
00533    dst->f[0] = (float) src->i[0];
00534    dst->f[1] = (float) src->i[1];
00535    dst->f[2] = (float) src->i[2];
00536    dst->f[3] = (float) src->i[3];
00537 }
00538 
00539 static void
00540 micro_lg2(
00541    union tgsi_exec_channel *dst,
00542    const union tgsi_exec_channel *src )
00543 {
00544 #if FAST_MATH
00545    dst->f[0] = util_fast_log2( src->f[0] );
00546    dst->f[1] = util_fast_log2( src->f[1] );
00547    dst->f[2] = util_fast_log2( src->f[2] );
00548    dst->f[3] = util_fast_log2( src->f[3] );
00549 #else
00550    dst->f[0] = logf( src->f[0] ) * 1.442695f;
00551    dst->f[1] = logf( src->f[1] ) * 1.442695f;
00552    dst->f[2] = logf( src->f[2] ) * 1.442695f;
00553    dst->f[3] = logf( src->f[3] ) * 1.442695f;
00554 #endif
00555 }
00556 
00557 static void
00558 micro_le(
00559    union tgsi_exec_channel *dst,
00560    const union tgsi_exec_channel *src0,
00561    const union tgsi_exec_channel *src1,
00562    const union tgsi_exec_channel *src2,
00563    const union tgsi_exec_channel *src3 )
00564 {
00565    dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
00566    dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
00567    dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
00568    dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
00569 }
00570 
00571 static void
00572 micro_lt(
00573    union tgsi_exec_channel *dst,
00574    const union tgsi_exec_channel *src0,
00575    const union tgsi_exec_channel *src1,
00576    const union tgsi_exec_channel *src2,
00577    const union tgsi_exec_channel *src3 )
00578 {
00579    dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
00580    dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
00581    dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
00582    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
00583 }
00584 
00585 static void
00586 micro_ilt(
00587    union tgsi_exec_channel *dst,
00588    const union tgsi_exec_channel *src0,
00589    const union tgsi_exec_channel *src1,
00590    const union tgsi_exec_channel *src2,
00591    const union tgsi_exec_channel *src3 )
00592 {
00593    dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
00594    dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
00595    dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
00596    dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
00597 }
00598 
00599 static void
00600 micro_ult(
00601    union tgsi_exec_channel *dst,
00602    const union tgsi_exec_channel *src0,
00603    const union tgsi_exec_channel *src1,
00604    const union tgsi_exec_channel *src2,
00605    const union tgsi_exec_channel *src3 )
00606 {
00607    dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
00608    dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
00609    dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
00610    dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
00611 }
00612 
00613 static void
00614 micro_max(
00615    union tgsi_exec_channel *dst,
00616    const union tgsi_exec_channel *src0,
00617    const union tgsi_exec_channel *src1 )
00618 {
00619    dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
00620    dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
00621    dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
00622    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
00623 }
00624 
00625 static void
00626 micro_imax(
00627    union tgsi_exec_channel *dst,
00628    const union tgsi_exec_channel *src0,
00629    const union tgsi_exec_channel *src1 )
00630 {
00631    dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
00632    dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
00633    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
00634    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
00635 }
00636 
00637 static void
00638 micro_umax(
00639    union tgsi_exec_channel *dst,
00640    const union tgsi_exec_channel *src0,
00641    const union tgsi_exec_channel *src1 )
00642 {
00643    dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
00644    dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
00645    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
00646    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
00647 }
00648 
00649 static void
00650 micro_min(
00651    union tgsi_exec_channel *dst,
00652    const union tgsi_exec_channel *src0,
00653    const union tgsi_exec_channel *src1 )
00654 {
00655    dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
00656    dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
00657    dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
00658    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
00659 }
00660 
00661 static void
00662 micro_imin(
00663    union tgsi_exec_channel *dst,
00664    const union tgsi_exec_channel *src0,
00665    const union tgsi_exec_channel *src1 )
00666 {
00667    dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
00668    dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
00669    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
00670    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
00671 }
00672 
00673 static void
00674 micro_umin(
00675    union tgsi_exec_channel *dst,
00676    const union tgsi_exec_channel *src0,
00677    const union tgsi_exec_channel *src1 )
00678 {
00679    dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
00680    dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
00681    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
00682    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
00683 }
00684 
00685 static void
00686 micro_umod(
00687    union tgsi_exec_channel *dst,
00688    const union tgsi_exec_channel *src0,
00689    const union tgsi_exec_channel *src1 )
00690 {
00691    dst->u[0] = src0->u[0] % src1->u[0];
00692    dst->u[1] = src0->u[1] % src1->u[1];
00693    dst->u[2] = src0->u[2] % src1->u[2];
00694    dst->u[3] = src0->u[3] % src1->u[3];
00695 }
00696 
00697 static void
00698 micro_mul(
00699    union tgsi_exec_channel *dst,
00700    const union tgsi_exec_channel *src0,
00701    const union tgsi_exec_channel *src1 )
00702 {
00703    dst->f[0] = src0->f[0] * src1->f[0];
00704    dst->f[1] = src0->f[1] * src1->f[1];
00705    dst->f[2] = src0->f[2] * src1->f[2];
00706    dst->f[3] = src0->f[3] * src1->f[3];
00707 }
00708 
00709 static void
00710 micro_imul(
00711    union tgsi_exec_channel *dst,
00712    const union tgsi_exec_channel *src0,
00713    const union tgsi_exec_channel *src1 )
00714 {
00715    dst->i[0] = src0->i[0] * src1->i[0];
00716    dst->i[1] = src0->i[1] * src1->i[1];
00717    dst->i[2] = src0->i[2] * src1->i[2];
00718    dst->i[3] = src0->i[3] * src1->i[3];
00719 }
00720 
00721 static void
00722 micro_imul64(
00723    union tgsi_exec_channel *dst0,
00724    union tgsi_exec_channel *dst1,
00725    const union tgsi_exec_channel *src0,
00726    const union tgsi_exec_channel *src1 )
00727 {
00728    dst1->i[0] = src0->i[0] * src1->i[0];
00729    dst1->i[1] = src0->i[1] * src1->i[1];
00730    dst1->i[2] = src0->i[2] * src1->i[2];
00731    dst1->i[3] = src0->i[3] * src1->i[3];
00732    dst0->i[0] = 0;
00733    dst0->i[1] = 0;
00734    dst0->i[2] = 0;
00735    dst0->i[3] = 0;
00736 }
00737 
00738 static void
00739 micro_umul64(
00740    union tgsi_exec_channel *dst0,
00741    union tgsi_exec_channel *dst1,
00742    const union tgsi_exec_channel *src0,
00743    const union tgsi_exec_channel *src1 )
00744 {
00745    dst1->u[0] = src0->u[0] * src1->u[0];
00746    dst1->u[1] = src0->u[1] * src1->u[1];
00747    dst1->u[2] = src0->u[2] * src1->u[2];
00748    dst1->u[3] = src0->u[3] * src1->u[3];
00749    dst0->u[0] = 0;
00750    dst0->u[1] = 0;
00751    dst0->u[2] = 0;
00752    dst0->u[3] = 0;
00753 }
00754 
00755 static void
00756 micro_movc(
00757    union tgsi_exec_channel *dst,
00758    const union tgsi_exec_channel *src0,
00759    const union tgsi_exec_channel *src1,
00760    const union tgsi_exec_channel *src2 )
00761 {
00762    dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
00763    dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
00764    dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
00765    dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
00766 }
00767 
00768 static void
00769 micro_neg(
00770    union tgsi_exec_channel *dst,
00771    const union tgsi_exec_channel *src )
00772 {
00773    dst->f[0] = -src->f[0];
00774    dst->f[1] = -src->f[1];
00775    dst->f[2] = -src->f[2];
00776    dst->f[3] = -src->f[3];
00777 }
00778 
00779 static void
00780 micro_ineg(
00781    union tgsi_exec_channel *dst,
00782    const union tgsi_exec_channel *src )
00783 {
00784    dst->i[0] = -src->i[0];
00785    dst->i[1] = -src->i[1];
00786    dst->i[2] = -src->i[2];
00787    dst->i[3] = -src->i[3];
00788 }
00789 
00790 static void
00791 micro_not(
00792    union tgsi_exec_channel *dst,
00793    const union tgsi_exec_channel *src )
00794 {
00795    dst->u[0] = ~src->u[0];
00796    dst->u[1] = ~src->u[1];
00797    dst->u[2] = ~src->u[2];
00798    dst->u[3] = ~src->u[3];
00799 }
00800 
00801 static void
00802 micro_or(
00803    union tgsi_exec_channel *dst,
00804    const union tgsi_exec_channel *src0,
00805    const union tgsi_exec_channel *src1 )
00806 {
00807    dst->u[0] = src0->u[0] | src1->u[0];
00808    dst->u[1] = src0->u[1] | src1->u[1];
00809    dst->u[2] = src0->u[2] | src1->u[2];
00810    dst->u[3] = src0->u[3] | src1->u[3];
00811 }
00812 
00813 static void
00814 micro_pow(
00815    union tgsi_exec_channel *dst,
00816    const union tgsi_exec_channel *src0,
00817    const union tgsi_exec_channel *src1 )
00818 {
00819 #if FAST_MATH
00820    dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
00821    dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
00822    dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
00823    dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
00824 #else
00825    dst->f[0] = powf( src0->f[0], src1->f[0] );
00826    dst->f[1] = powf( src0->f[1], src1->f[1] );
00827    dst->f[2] = powf( src0->f[2], src1->f[2] );
00828    dst->f[3] = powf( src0->f[3], src1->f[3] );
00829 #endif
00830 }
00831 
00832 static void
00833 micro_rnd(
00834    union tgsi_exec_channel *dst,
00835    const union tgsi_exec_channel *src )
00836 {
00837    dst->f[0] = floorf( src->f[0] + 0.5f );
00838    dst->f[1] = floorf( src->f[1] + 0.5f );
00839    dst->f[2] = floorf( src->f[2] + 0.5f );
00840    dst->f[3] = floorf( src->f[3] + 0.5f );
00841 }
00842 
00843 static void
00844 micro_sgn(
00845    union tgsi_exec_channel *dst,
00846    const union tgsi_exec_channel *src )
00847 {
00848    dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
00849    dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
00850    dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
00851    dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
00852 }
00853 
00854 static void
00855 micro_shl(
00856    union tgsi_exec_channel *dst,
00857    const union tgsi_exec_channel *src0,
00858    const union tgsi_exec_channel *src1 )
00859 {
00860    dst->i[0] = src0->i[0] << src1->i[0];
00861    dst->i[1] = src0->i[1] << src1->i[1];
00862    dst->i[2] = src0->i[2] << src1->i[2];
00863    dst->i[3] = src0->i[3] << src1->i[3];
00864 }
00865 
00866 static void
00867 micro_ishr(
00868    union tgsi_exec_channel *dst,
00869    const union tgsi_exec_channel *src0,
00870    const union tgsi_exec_channel *src1 )
00871 {
00872    dst->i[0] = src0->i[0] >> src1->i[0];
00873    dst->i[1] = src0->i[1] >> src1->i[1];
00874    dst->i[2] = src0->i[2] >> src1->i[2];
00875    dst->i[3] = src0->i[3] >> src1->i[3];
00876 }
00877 
00878 static void
00879 micro_trunc(
00880    union tgsi_exec_channel *dst,
00881    const union tgsi_exec_channel *src0 )
00882 {
00883    dst->f[0] = (float) (int) src0->f[0];
00884    dst->f[1] = (float) (int) src0->f[1];
00885    dst->f[2] = (float) (int) src0->f[2];
00886    dst->f[3] = (float) (int) src0->f[3];
00887 }
00888 
00889 static void
00890 micro_ushr(
00891    union tgsi_exec_channel *dst,
00892    const union tgsi_exec_channel *src0,
00893    const union tgsi_exec_channel *src1 )
00894 {
00895    dst->u[0] = src0->u[0] >> src1->u[0];
00896    dst->u[1] = src0->u[1] >> src1->u[1];
00897    dst->u[2] = src0->u[2] >> src1->u[2];
00898    dst->u[3] = src0->u[3] >> src1->u[3];
00899 }
00900 
00901 static void
00902 micro_sin(
00903    union tgsi_exec_channel *dst,
00904    const union tgsi_exec_channel *src )
00905 {
00906    dst->f[0] = sinf( src->f[0] );
00907    dst->f[1] = sinf( src->f[1] );
00908    dst->f[2] = sinf( src->f[2] );
00909    dst->f[3] = sinf( src->f[3] );
00910 }
00911 
00912 static void
00913 micro_sqrt( union tgsi_exec_channel *dst,
00914             const union tgsi_exec_channel *src )
00915 {
00916    dst->f[0] = sqrtf( src->f[0] );
00917    dst->f[1] = sqrtf( src->f[1] );
00918    dst->f[2] = sqrtf( src->f[2] );
00919    dst->f[3] = sqrtf( src->f[3] );
00920 }
00921 
00922 static void
00923 micro_sub(
00924    union tgsi_exec_channel *dst,
00925    const union tgsi_exec_channel *src0,
00926    const union tgsi_exec_channel *src1 )
00927 {
00928    dst->f[0] = src0->f[0] - src1->f[0];
00929    dst->f[1] = src0->f[1] - src1->f[1];
00930    dst->f[2] = src0->f[2] - src1->f[2];
00931    dst->f[3] = src0->f[3] - src1->f[3];
00932 }
00933 
00934 static void
00935 micro_u2f(
00936    union tgsi_exec_channel *dst,
00937    const union tgsi_exec_channel *src )
00938 {
00939    dst->f[0] = (float) src->u[0];
00940    dst->f[1] = (float) src->u[1];
00941    dst->f[2] = (float) src->u[2];
00942    dst->f[3] = (float) src->u[3];
00943 }
00944 
00945 static void
00946 micro_xor(
00947    union tgsi_exec_channel *dst,
00948    const union tgsi_exec_channel *src0,
00949    const union tgsi_exec_channel *src1 )
00950 {
00951    dst->u[0] = src0->u[0] ^ src1->u[0];
00952    dst->u[1] = src0->u[1] ^ src1->u[1];
00953    dst->u[2] = src0->u[2] ^ src1->u[2];
00954    dst->u[3] = src0->u[3] ^ src1->u[3];
00955 }
00956 
00957 static void
00958 fetch_src_file_channel(
00959    const struct tgsi_exec_machine *mach,
00960    const uint file,
00961    const uint swizzle,
00962    const union tgsi_exec_channel *index,
00963    union tgsi_exec_channel *chan )
00964 {
00965    switch( swizzle ) {
00966    case TGSI_EXTSWIZZLE_X:
00967    case TGSI_EXTSWIZZLE_Y:
00968    case TGSI_EXTSWIZZLE_Z:
00969    case TGSI_EXTSWIZZLE_W:
00970       switch( file ) {
00971       case TGSI_FILE_CONSTANT:
00972          assert(mach->Consts);
00973          if (index->i[0] < 0)
00974             chan->f[0] = 0.0f;
00975          else
00976             chan->f[0] = mach->Consts[index->i[0]][swizzle];
00977          if (index->i[1] < 0)
00978             chan->f[1] = 0.0f;
00979          else
00980             chan->f[1] = mach->Consts[index->i[1]][swizzle];
00981          if (index->i[2] < 0)
00982             chan->f[2] = 0.0f;
00983          else
00984             chan->f[2] = mach->Consts[index->i[2]][swizzle];
00985          if (index->i[3] < 0)
00986             chan->f[3] = 0.0f;
00987          else
00988             chan->f[3] = mach->Consts[index->i[3]][swizzle];
00989          break;
00990 
00991       case TGSI_FILE_INPUT:
00992          chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
00993          chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
00994          chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
00995          chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
00996          break;
00997 
00998       case TGSI_FILE_TEMPORARY:
00999          assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
01000          chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
01001          chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
01002          chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
01003          chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
01004          break;
01005 
01006       case TGSI_FILE_IMMEDIATE:
01007          assert( index->i[0] < (int) mach->ImmLimit );
01008          chan->f[0] = mach->Imms[index->i[0]][swizzle];
01009          assert( index->i[1] < (int) mach->ImmLimit );
01010          chan->f[1] = mach->Imms[index->i[1]][swizzle];
01011          assert( index->i[2] < (int) mach->ImmLimit );
01012          chan->f[2] = mach->Imms[index->i[2]][swizzle];
01013          assert( index->i[3] < (int) mach->ImmLimit );
01014          chan->f[3] = mach->Imms[index->i[3]][swizzle];
01015          break;
01016 
01017       case TGSI_FILE_ADDRESS:
01018          chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
01019          chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
01020          chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
01021          chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
01022          break;
01023 
01024       case TGSI_FILE_OUTPUT:
01025          /* vertex/fragment output vars can be read too */
01026          chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
01027          chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
01028          chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
01029          chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
01030          break;
01031 
01032       default:
01033          assert( 0 );
01034       }
01035       break;
01036 
01037    case TGSI_EXTSWIZZLE_ZERO:
01038       *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
01039       break;
01040 
01041    case TGSI_EXTSWIZZLE_ONE:
01042       *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
01043       break;
01044 
01045    default:
01046       assert( 0 );
01047    }
01048 }
01049 
01050 static void
01051 fetch_source(
01052    const struct tgsi_exec_machine *mach,
01053    union tgsi_exec_channel *chan,
01054    const struct tgsi_full_src_register *reg,
01055    const uint chan_index )
01056 {
01057    union tgsi_exec_channel index;
01058    uint swizzle;
01059 
01060    index.i[0] =
01061    index.i[1] =
01062    index.i[2] =
01063    index.i[3] = reg->SrcRegister.Index;
01064 
01065    if (reg->SrcRegister.Indirect) {
01066       union tgsi_exec_channel index2;
01067       union tgsi_exec_channel indir_index;
01068       const uint execmask = mach->ExecMask;
01069       uint i;
01070 
01071       /* which address register (always zero now) */
01072       index2.i[0] =
01073       index2.i[1] =
01074       index2.i[2] =
01075       index2.i[3] = reg->SrcRegisterInd.Index;
01076 
01077       /* get current value of address register[swizzle] */
01078       swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
01079       fetch_src_file_channel(
01080          mach,
01081          reg->SrcRegisterInd.File,
01082          swizzle,
01083          &index2,
01084          &indir_index );
01085 
01086       /* add value of address register to the offset */
01087       index.i[0] += (int) indir_index.f[0];
01088       index.i[1] += (int) indir_index.f[1];
01089       index.i[2] += (int) indir_index.f[2];
01090       index.i[3] += (int) indir_index.f[3];
01091 
01092       /* for disabled execution channels, zero-out the index to
01093        * avoid using a potential garbage value.
01094        */
01095       for (i = 0; i < QUAD_SIZE; i++) {
01096          if ((execmask & (1 << i)) == 0)
01097             index.i[i] = 0;
01098       }
01099    }
01100 
01101    if( reg->SrcRegister.Dimension ) {
01102       switch( reg->SrcRegister.File ) {
01103       case TGSI_FILE_INPUT:
01104          index.i[0] *= 17;
01105          index.i[1] *= 17;
01106          index.i[2] *= 17;
01107          index.i[3] *= 17;
01108          break;
01109       case TGSI_FILE_CONSTANT:
01110          index.i[0] *= 4096;
01111          index.i[1] *= 4096;
01112          index.i[2] *= 4096;
01113          index.i[3] *= 4096;
01114          break;
01115       default:
01116          assert( 0 );
01117       }
01118 
01119       index.i[0] += reg->SrcRegisterDim.Index;
01120       index.i[1] += reg->SrcRegisterDim.Index;
01121       index.i[2] += reg->SrcRegisterDim.Index;
01122       index.i[3] += reg->SrcRegisterDim.Index;
01123 
01124       if (reg->SrcRegisterDim.Indirect) {
01125          union tgsi_exec_channel index2;
01126          union tgsi_exec_channel indir_index;
01127          const uint execmask = mach->ExecMask;
01128          uint i;
01129 
01130          index2.i[0] =
01131          index2.i[1] =
01132          index2.i[2] =
01133          index2.i[3] = reg->SrcRegisterDimInd.Index;
01134 
01135          swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
01136          fetch_src_file_channel(
01137             mach,
01138             reg->SrcRegisterDimInd.File,
01139             swizzle,
01140             &index2,
01141             &indir_index );
01142 
01143          index.i[0] += (int) indir_index.f[0];
01144          index.i[1] += (int) indir_index.f[1];
01145          index.i[2] += (int) indir_index.f[2];
01146          index.i[3] += (int) indir_index.f[3];
01147 
01148          /* for disabled execution channels, zero-out the index to
01149           * avoid using a potential garbage value.
01150           */
01151          for (i = 0; i < QUAD_SIZE; i++) {
01152             if ((execmask & (1 << i)) == 0)
01153                index.i[i] = 0;
01154          }
01155       }
01156    }
01157 
01158    swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
01159    fetch_src_file_channel(
01160       mach,
01161       reg->SrcRegister.File,
01162       swizzle,
01163       &index,
01164       chan );
01165 
01166    switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
01167    case TGSI_UTIL_SIGN_CLEAR:
01168       micro_abs( chan, chan );
01169       break;
01170 
01171    case TGSI_UTIL_SIGN_SET:
01172       micro_abs( chan, chan );
01173       micro_neg( chan, chan );
01174       break;
01175 
01176    case TGSI_UTIL_SIGN_TOGGLE:
01177       micro_neg( chan, chan );
01178       break;
01179 
01180    case TGSI_UTIL_SIGN_KEEP:
01181       break;
01182    }
01183 
01184    if (reg->SrcRegisterExtMod.Complement) {
01185       micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
01186    }
01187 }
01188 
01189 static void
01190 store_dest(
01191    struct tgsi_exec_machine *mach,
01192    const union tgsi_exec_channel *chan,
01193    const struct tgsi_full_dst_register *reg,
01194    const struct tgsi_full_instruction *inst,
01195    uint chan_index )
01196 {
01197    uint i;
01198    union tgsi_exec_channel null;
01199    union tgsi_exec_channel *dst;
01200    uint execmask = mach->ExecMask;
01201 
01202    switch (reg->DstRegister.File) {
01203    case TGSI_FILE_NULL:
01204       dst = &null;
01205       break;
01206 
01207    case TGSI_FILE_OUTPUT:
01208       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
01209                            + reg->DstRegister.Index].xyzw[chan_index];
01210       break;
01211 
01212    case TGSI_FILE_TEMPORARY:
01213       assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
01214       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
01215       break;
01216 
01217    case TGSI_FILE_ADDRESS:
01218       dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
01219       break;
01220 
01221    default:
01222       assert( 0 );
01223       return;
01224    }
01225 
01226    if (inst->InstructionExtNv.CondFlowEnable) {
01227       union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
01228       uint swizzle;
01229       uint shift;
01230       uint mask;
01231       uint test;
01232 
01233       /* Only CC0 supported.
01234        */
01235       assert( inst->InstructionExtNv.CondFlowIndex < 1 );
01236 
01237       switch (chan_index) {
01238       case CHAN_X:
01239          swizzle = inst->InstructionExtNv.CondSwizzleX;
01240          break;
01241       case CHAN_Y:
01242          swizzle = inst->InstructionExtNv.CondSwizzleY;
01243          break;
01244       case CHAN_Z:
01245          swizzle = inst->InstructionExtNv.CondSwizzleZ;
01246          break;
01247       case CHAN_W:
01248          swizzle = inst->InstructionExtNv.CondSwizzleW;
01249          break;
01250       default:
01251          assert( 0 );
01252          return;
01253       }
01254 
01255       switch (swizzle) {
01256       case TGSI_SWIZZLE_X:
01257          shift = TGSI_EXEC_CC_X_SHIFT;
01258          mask = TGSI_EXEC_CC_X_MASK;
01259          break;
01260       case TGSI_SWIZZLE_Y:
01261          shift = TGSI_EXEC_CC_Y_SHIFT;
01262          mask = TGSI_EXEC_CC_Y_MASK;
01263          break;
01264       case TGSI_SWIZZLE_Z:
01265          shift = TGSI_EXEC_CC_Z_SHIFT;
01266          mask = TGSI_EXEC_CC_Z_MASK;
01267          break;
01268       case TGSI_SWIZZLE_W:
01269          shift = TGSI_EXEC_CC_W_SHIFT;
01270          mask = TGSI_EXEC_CC_W_MASK;
01271          break;
01272       default:
01273          assert( 0 );
01274          return;
01275       }
01276 
01277       switch (inst->InstructionExtNv.CondMask) {
01278       case TGSI_CC_GT:
01279          test = ~(TGSI_EXEC_CC_GT << shift) & mask;
01280          for (i = 0; i < QUAD_SIZE; i++)
01281             if (cc->u[i] & test)
01282                execmask &= ~(1 << i);
01283          break;
01284 
01285       case TGSI_CC_EQ:
01286          test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
01287          for (i = 0; i < QUAD_SIZE; i++)
01288             if (cc->u[i] & test)
01289                execmask &= ~(1 << i);
01290          break;
01291 
01292       case TGSI_CC_LT:
01293          test = ~(TGSI_EXEC_CC_LT << shift) & mask;
01294          for (i = 0; i < QUAD_SIZE; i++)
01295             if (cc->u[i] & test)
01296                execmask &= ~(1 << i);
01297          break;
01298 
01299       case TGSI_CC_GE:
01300          test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
01301          for (i = 0; i < QUAD_SIZE; i++)
01302             if (cc->u[i] & test)
01303                execmask &= ~(1 << i);
01304          break;
01305 
01306       case TGSI_CC_LE:
01307          test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
01308          for (i = 0; i < QUAD_SIZE; i++)
01309             if (cc->u[i] & test)
01310                execmask &= ~(1 << i);
01311          break;
01312 
01313       case TGSI_CC_NE:
01314          test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
01315          for (i = 0; i < QUAD_SIZE; i++)
01316             if (cc->u[i] & test)
01317                execmask &= ~(1 << i);
01318          break;
01319 
01320       case TGSI_CC_TR:
01321          break;
01322 
01323       case TGSI_CC_FL:
01324          for (i = 0; i < QUAD_SIZE; i++)
01325             execmask &= ~(1 << i);
01326          break;
01327 
01328       default:
01329          assert( 0 );
01330          return;
01331       }
01332    }
01333 
01334    switch (inst->Instruction.Saturate) {
01335    case TGSI_SAT_NONE:
01336       for (i = 0; i < QUAD_SIZE; i++)
01337          if (execmask & (1 << i))
01338             dst->i[i] = chan->i[i];
01339       break;
01340 
01341    case TGSI_SAT_ZERO_ONE:
01342       for (i = 0; i < QUAD_SIZE; i++)
01343          if (execmask & (1 << i)) {
01344             if (chan->f[i] < 0.0f)
01345                dst->f[i] = 0.0f;
01346             else if (chan->f[i] > 1.0f)
01347                dst->f[i] = 1.0f;
01348             else
01349                dst->i[i] = chan->i[i];
01350          }
01351       break;
01352 
01353    case TGSI_SAT_MINUS_PLUS_ONE:
01354       for (i = 0; i < QUAD_SIZE; i++)
01355          if (execmask & (1 << i)) {
01356             if (chan->f[i] < -1.0f)
01357                dst->f[i] = -1.0f;
01358             else if (chan->f[i] > 1.0f)
01359                dst->f[i] = 1.0f;
01360             else
01361                dst->i[i] = chan->i[i];
01362          }
01363       break;
01364 
01365    default:
01366       assert( 0 );
01367    }
01368 
01369    if (inst->InstructionExtNv.CondDstUpdate) {
01370       union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
01371       uint shift;
01372       uint mask;
01373 
01374       /* Only CC0 supported.
01375        */
01376       assert( inst->InstructionExtNv.CondDstIndex < 1 );
01377 
01378       switch (chan_index) {
01379       case CHAN_X:
01380          shift = TGSI_EXEC_CC_X_SHIFT;
01381          mask = ~TGSI_EXEC_CC_X_MASK;
01382          break;
01383       case CHAN_Y:
01384          shift = TGSI_EXEC_CC_Y_SHIFT;
01385          mask = ~TGSI_EXEC_CC_Y_MASK;
01386          break;
01387       case CHAN_Z:
01388          shift = TGSI_EXEC_CC_Z_SHIFT;
01389          mask = ~TGSI_EXEC_CC_Z_MASK;
01390          break;
01391       case CHAN_W:
01392          shift = TGSI_EXEC_CC_W_SHIFT;
01393          mask = ~TGSI_EXEC_CC_W_MASK;
01394          break;
01395       default:
01396          assert( 0 );
01397          return;
01398       }
01399 
01400       for (i = 0; i < QUAD_SIZE; i++)
01401          if (execmask & (1 << i)) {
01402             cc->u[i] &= mask;
01403             if (dst->f[i] < 0.0f)
01404                cc->u[i] |= TGSI_EXEC_CC_LT << shift;
01405             else if (dst->f[i] > 0.0f)
01406                cc->u[i] |= TGSI_EXEC_CC_GT << shift;
01407             else if (dst->f[i] == 0.0f)
01408                cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
01409             else
01410                cc->u[i] |= TGSI_EXEC_CC_UN << shift;
01411          }
01412    }
01413 }
01414 
01415 #define FETCH(VAL,INDEX,CHAN)\
01416     fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
01417 
01418 #define STORE(VAL,INDEX,CHAN)\
01419     store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
01420 
01421 
01426 static void
01427 exec_kil(struct tgsi_exec_machine *mach,
01428          const struct tgsi_full_instruction *inst)
01429 {
01430    uint uniquemask;
01431    uint chan_index;
01432    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
01433    union tgsi_exec_channel r[1];
01434 
01435    /* This mask stores component bits that were already tested. Note that
01436     * we test if the value is less than zero, so 1.0 and 0.0 need not to be
01437     * tested. */
01438    uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
01439 
01440    for (chan_index = 0; chan_index < 4; chan_index++)
01441    {
01442       uint swizzle;
01443       uint i;
01444 
01445       /* unswizzle channel */
01446       swizzle = tgsi_util_get_full_src_register_extswizzle (
01447                         &inst->FullSrcRegisters[0],
01448                         chan_index);
01449 
01450       /* check if the component has not been already tested */
01451       if (uniquemask & (1 << swizzle))
01452          continue;
01453       uniquemask |= 1 << swizzle;
01454 
01455       FETCH(&r[0], 0, chan_index);
01456       for (i = 0; i < 4; i++)
01457          if (r[0].f[i] < 0.0f)
01458             kilmask |= 1 << i;
01459    }
01460 
01461    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
01462 }
01463 
01468 static void
01469 exec_kilp(struct tgsi_exec_machine *mach,
01470           const struct tgsi_full_instruction *inst)
01471 {
01472    uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
01473 
01474    if (inst->InstructionExtNv.CondFlowEnable) {
01475       uint swizzle[4];
01476       uint chan_index;
01477 
01478       kilmask = 0x0;
01479 
01480       swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
01481       swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
01482       swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
01483       swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
01484 
01485       for (chan_index = 0; chan_index < 4; chan_index++)
01486       {
01487          uint i;
01488 
01489          for (i = 0; i < 4; i++) {
01490             /* TODO: evaluate the condition code */
01491             if (0)
01492                kilmask |= 1 << i;
01493          }
01494       }
01495    }
01496    else {
01497       /* "unconditional" kil */
01498       kilmask = mach->ExecMask;
01499    }
01500    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
01501 }
01502 
01503 
01504 /*
01505  * Fetch a texel using STR texture coordinates.
01506  */
01507 static void
01508 fetch_texel( struct tgsi_sampler *sampler,
01509              const union tgsi_exec_channel *s,
01510              const union tgsi_exec_channel *t,
01511              const union tgsi_exec_channel *p,
01512              float lodbias,  /* XXX should be float[4] */
01513              union tgsi_exec_channel *r,
01514              union tgsi_exec_channel *g,
01515              union tgsi_exec_channel *b,
01516              union tgsi_exec_channel *a )
01517 {
01518    uint j;
01519    float rgba[NUM_CHANNELS][QUAD_SIZE];
01520 
01521    sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
01522 
01523    for (j = 0; j < 4; j++) {
01524       r->f[j] = rgba[0][j];
01525       g->f[j] = rgba[1][j];
01526       b->f[j] = rgba[2][j];
01527       a->f[j] = rgba[3][j];
01528    }
01529 }
01530 
01531 
01532 static void
01533 exec_tex(struct tgsi_exec_machine *mach,
01534          const struct tgsi_full_instruction *inst,
01535          boolean biasLod,
01536          boolean projected)
01537 {
01538    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
01539    union tgsi_exec_channel r[8];
01540    uint chan_index;
01541    float lodBias;
01542 
01543    /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
01544 
01545    switch (inst->InstructionExtTexture.Texture) {
01546    case TGSI_TEXTURE_1D:
01547 
01548       FETCH(&r[0], 0, CHAN_X);
01549 
01550       if (projected) {
01551          FETCH(&r[1], 0, CHAN_W);
01552          micro_div( &r[0], &r[0], &r[1] );
01553       }
01554 
01555       if (biasLod) {
01556          FETCH(&r[1], 0, CHAN_W);
01557          lodBias = r[2].f[0];
01558       }
01559       else
01560          lodBias = 0.0;
01561 
01562       fetch_texel(&mach->Samplers[unit],
01563                   &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
01564                   &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
01565       break;
01566 
01567    case TGSI_TEXTURE_2D:
01568    case TGSI_TEXTURE_RECT:
01569 
01570       FETCH(&r[0], 0, CHAN_X);
01571       FETCH(&r[1], 0, CHAN_Y);
01572       FETCH(&r[2], 0, CHAN_Z);
01573 
01574       if (projected) {
01575          FETCH(&r[3], 0, CHAN_W);
01576          micro_div( &r[0], &r[0], &r[3] );
01577          micro_div( &r[1], &r[1], &r[3] );
01578          micro_div( &r[2], &r[2], &r[3] );
01579       }
01580 
01581       if (biasLod) {
01582          FETCH(&r[3], 0, CHAN_W);
01583          lodBias = r[3].f[0];
01584       }
01585       else
01586          lodBias = 0.0;
01587 
01588       fetch_texel(&mach->Samplers[unit],
01589                   &r[0], &r[1], &r[2], lodBias,  /* inputs */
01590                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
01591       break;
01592 
01593    case TGSI_TEXTURE_3D:
01594    case TGSI_TEXTURE_CUBE:
01595 
01596       FETCH(&r[0], 0, CHAN_X);
01597       FETCH(&r[1], 0, CHAN_Y);
01598       FETCH(&r[2], 0, CHAN_Z);
01599 
01600       if (projected) {
01601          FETCH(&r[3], 0, CHAN_W);
01602          micro_div( &r[0], &r[0], &r[3] );
01603          micro_div( &r[1], &r[1], &r[3] );
01604          micro_div( &r[2], &r[2], &r[3] );
01605       }
01606 
01607       if (biasLod) {
01608          FETCH(&r[3], 0, CHAN_W);
01609          lodBias = r[3].f[0];
01610       }
01611       else
01612          lodBias = 0.0;
01613 
01614       fetch_texel(&mach->Samplers[unit],
01615                   &r[0], &r[1], &r[2], lodBias,
01616                   &r[0], &r[1], &r[2], &r[3]);
01617       break;
01618 
01619    default:
01620       assert (0);
01621    }
01622 
01623    FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01624       STORE( &r[chan_index], 0, chan_index );
01625    }
01626 }
01627 
01628 
01633 static void
01634 eval_constant_coef(
01635    struct tgsi_exec_machine *mach,
01636    unsigned attrib,
01637    unsigned chan )
01638 {
01639    unsigned i;
01640 
01641    for( i = 0; i < QUAD_SIZE; i++ ) {
01642       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
01643    }
01644 }
01645 
01650 static void
01651 eval_linear_coef(
01652    struct tgsi_exec_machine *mach,
01653    unsigned attrib,
01654    unsigned chan )
01655 {
01656    const float x = mach->QuadPos.xyzw[0].f[0];
01657    const float y = mach->QuadPos.xyzw[1].f[0];
01658    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
01659    const float dady = mach->InterpCoefs[attrib].dady[chan];
01660    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
01661    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
01662    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
01663    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
01664    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
01665 }
01666 
01671 static void
01672 eval_perspective_coef(
01673    struct tgsi_exec_machine *mach,
01674    unsigned attrib,
01675    unsigned chan )
01676 {
01677    const float x = mach->QuadPos.xyzw[0].f[0];
01678    const float y = mach->QuadPos.xyzw[1].f[0];
01679    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
01680    const float dady = mach->InterpCoefs[attrib].dady[chan];
01681    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
01682    const float *w = mach->QuadPos.xyzw[3].f;
01683    /* divide by W here */
01684    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
01685    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
01686    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
01687    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
01688 }
01689 
01690 
01691 typedef void (* eval_coef_func)(
01692    struct tgsi_exec_machine *mach,
01693    unsigned attrib,
01694    unsigned chan );
01695 
01696 static void
01697 exec_declaration(
01698    struct tgsi_exec_machine *mach,
01699    const struct tgsi_full_declaration *decl )
01700 {
01701    if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
01702       if( decl->Declaration.File == TGSI_FILE_INPUT ) {
01703          unsigned first, last, mask;
01704          eval_coef_func eval;
01705 
01706          first = decl->DeclarationRange.First;
01707          last = decl->DeclarationRange.Last;
01708          mask = decl->Declaration.UsageMask;
01709 
01710          switch( decl->Declaration.Interpolate ) {
01711          case TGSI_INTERPOLATE_CONSTANT:
01712             eval = eval_constant_coef;
01713             break;
01714 
01715          case TGSI_INTERPOLATE_LINEAR:
01716             eval = eval_linear_coef;
01717             break;
01718 
01719          case TGSI_INTERPOLATE_PERSPECTIVE:
01720             eval = eval_perspective_coef;
01721             break;
01722 
01723          default:
01724             assert( 0 );
01725          }
01726 
01727          if( mask == TGSI_WRITEMASK_XYZW ) {
01728             unsigned i, j;
01729 
01730             for( i = first; i <= last; i++ ) {
01731                for( j = 0; j < NUM_CHANNELS; j++ ) {
01732                   eval( mach, i, j );
01733                }
01734             }
01735          }
01736          else {
01737             unsigned i, j;
01738 
01739             for( j = 0; j < NUM_CHANNELS; j++ ) {
01740                if( mask & (1 << j) ) {
01741                   for( i = first; i <= last; i++ ) {
01742                      eval( mach, i, j );
01743                   }
01744                }
01745             }
01746          }
01747       }
01748    }
01749 }
01750 
01751 static void
01752 exec_instruction(
01753    struct tgsi_exec_machine *mach,
01754    const struct tgsi_full_instruction *inst,
01755    int *pc )
01756 {
01757    uint chan_index;
01758    union tgsi_exec_channel r[10];
01759 
01760    (*pc)++;
01761 
01762    switch (inst->Instruction.Opcode) {
01763    case TGSI_OPCODE_ARL:
01764    case TGSI_OPCODE_FLOOR:
01765    /* TGSI_OPCODE_FLR */
01766       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01767          FETCH( &r[0], 0, chan_index );
01768          micro_flr(&r[0], &r[0]);
01769          STORE( &r[0], 0, chan_index );
01770       }
01771       break;
01772 
01773    case TGSI_OPCODE_MOV:
01774    case TGSI_OPCODE_SWZ:
01775       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01776          FETCH( &r[0], 0, chan_index );
01777          STORE( &r[0], 0, chan_index );
01778       }
01779       break;
01780 
01781    case TGSI_OPCODE_LIT:
01782       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01783          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01784       }
01785 
01786       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01787          FETCH( &r[0], 0, CHAN_X );
01788          if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01789             micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01790             STORE( &r[0], 0, CHAN_Y );
01791          }
01792 
01793          if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01794             FETCH( &r[1], 0, CHAN_Y );
01795             micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01796 
01797             FETCH( &r[2], 0, CHAN_W );
01798             micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
01799             micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
01800             micro_pow( &r[1], &r[1], &r[2] );
01801             micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01802             STORE( &r[0], 0, CHAN_Z );
01803          }
01804       }
01805 
01806       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01807          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01808       }
01809       break;
01810 
01811    case TGSI_OPCODE_RCP:
01812    /* TGSI_OPCODE_RECIP */
01813       FETCH( &r[0], 0, CHAN_X );
01814       micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
01815       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01816          STORE( &r[0], 0, chan_index );
01817       }
01818       break;
01819 
01820    case TGSI_OPCODE_RSQ:
01821    /* TGSI_OPCODE_RECIPSQRT */
01822       FETCH( &r[0], 0, CHAN_X );
01823       micro_sqrt( &r[0], &r[0] );
01824       micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
01825       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01826          STORE( &r[0], 0, chan_index );
01827       }
01828       break;
01829 
01830    case TGSI_OPCODE_EXP:
01831       FETCH( &r[0], 0, CHAN_X );
01832       micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
01833       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01834          micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
01835          STORE( &r[2], 0, CHAN_X );        /* store r2 */
01836       }
01837       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01838          micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
01839          STORE( &r[2], 0, CHAN_Y );        /* store r2 */
01840       }
01841       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01842          micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
01843          STORE( &r[2], 0, CHAN_Z );        /* store r2 */
01844       }
01845       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01846          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01847       }
01848       break;
01849 
01850    case TGSI_OPCODE_LOG:
01851       FETCH( &r[0], 0, CHAN_X );
01852       micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
01853       micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
01854       micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
01855       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01856          STORE( &r[0], 0, CHAN_X );
01857       }
01858       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01859          micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
01860          micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
01861          STORE( &r[0], 0, CHAN_Y );
01862       }
01863       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01864          STORE( &r[1], 0, CHAN_Z );
01865       }
01866       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01867          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01868       }
01869       break;
01870 
01871    case TGSI_OPCODE_MUL:
01872       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
01873       {
01874          FETCH(&r[0], 0, chan_index);
01875          FETCH(&r[1], 1, chan_index);
01876 
01877          micro_mul( &r[0], &r[0], &r[1] );
01878 
01879          STORE(&r[0], 0, chan_index);
01880       }
01881       break;
01882 
01883    case TGSI_OPCODE_ADD:
01884       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01885          FETCH( &r[0], 0, chan_index );
01886          FETCH( &r[1], 1, chan_index );
01887          micro_add( &r[0], &r[0], &r[1] );
01888          STORE( &r[0], 0, chan_index );
01889       }
01890       break;
01891 
01892    case TGSI_OPCODE_DP3:
01893    /* TGSI_OPCODE_DOT3 */
01894       FETCH( &r[0], 0, CHAN_X );
01895       FETCH( &r[1], 1, CHAN_X );
01896       micro_mul( &r[0], &r[0], &r[1] );
01897 
01898       FETCH( &r[1], 0, CHAN_Y );
01899       FETCH( &r[2], 1, CHAN_Y );
01900       micro_mul( &r[1], &r[1], &r[2] );
01901       micro_add( &r[0], &r[0], &r[1] );
01902 
01903       FETCH( &r[1], 0, CHAN_Z );
01904       FETCH( &r[2], 1, CHAN_Z );
01905       micro_mul( &r[1], &r[1], &r[2] );
01906       micro_add( &r[0], &r[0], &r[1] );
01907 
01908       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01909          STORE( &r[0], 0, chan_index );
01910       }
01911       break;
01912 
01913     case TGSI_OPCODE_DP4:
01914     /* TGSI_OPCODE_DOT4 */
01915        FETCH(&r[0], 0, CHAN_X);
01916        FETCH(&r[1], 1, CHAN_X);
01917 
01918        micro_mul( &r[0], &r[0], &r[1] );
01919 
01920        FETCH(&r[1], 0, CHAN_Y);
01921        FETCH(&r[2], 1, CHAN_Y);
01922 
01923        micro_mul( &r[1], &r[1], &r[2] );
01924        micro_add( &r[0], &r[0], &r[1] );
01925 
01926        FETCH(&r[1], 0, CHAN_Z);
01927        FETCH(&r[2], 1, CHAN_Z);
01928 
01929        micro_mul( &r[1], &r[1], &r[2] );
01930        micro_add( &r[0], &r[0], &r[1] );
01931 
01932        FETCH(&r[1], 0, CHAN_W);
01933        FETCH(&r[2], 1, CHAN_W);
01934 
01935        micro_mul( &r[1], &r[1], &r[2] );
01936        micro_add( &r[0], &r[0], &r[1] );
01937 
01938       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01939          STORE( &r[0], 0, chan_index );
01940       }
01941       break;
01942 
01943    case TGSI_OPCODE_DST:
01944       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01945          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01946       }
01947 
01948       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01949          FETCH( &r[0], 0, CHAN_Y );
01950          FETCH( &r[1], 1, CHAN_Y);
01951          micro_mul( &r[0], &r[0], &r[1] );
01952          STORE( &r[0], 0, CHAN_Y );
01953       }
01954 
01955       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01956          FETCH( &r[0], 0, CHAN_Z );
01957          STORE( &r[0], 0, CHAN_Z );
01958       }
01959 
01960       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01961          FETCH( &r[0], 1, CHAN_W );
01962          STORE( &r[0], 0, CHAN_W );
01963       }
01964       break;
01965 
01966    case TGSI_OPCODE_MIN:
01967       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01968          FETCH(&r[0], 0, chan_index);
01969          FETCH(&r[1], 1, chan_index);
01970 
01971          /* XXX use micro_min()?? */
01972          micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
01973 
01974          STORE(&r[0], 0, chan_index);
01975       }
01976       break;
01977 
01978    case TGSI_OPCODE_MAX:
01979       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01980          FETCH(&r[0], 0, chan_index);
01981          FETCH(&r[1], 1, chan_index);
01982 
01983          /* XXX use micro_max()?? */
01984          micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
01985 
01986          STORE(&r[0], 0, chan_index );
01987       }
01988       break;
01989 
01990    case TGSI_OPCODE_SLT:
01991    /* TGSI_OPCODE_SETLT */
01992       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01993          FETCH( &r[0], 0, chan_index );
01994          FETCH( &r[1], 1, chan_index );
01995          micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01996          STORE( &r[0], 0, chan_index );
01997       }
01998       break;
01999 
02000    case TGSI_OPCODE_SGE:
02001    /* TGSI_OPCODE_SETGE */
02002       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02003          FETCH( &r[0], 0, chan_index );
02004          FETCH( &r[1], 1, chan_index );
02005          micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02006          STORE( &r[0], 0, chan_index );
02007       }
02008       break;
02009 
02010    case TGSI_OPCODE_MAD:
02011    /* TGSI_OPCODE_MADD */
02012       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02013          FETCH( &r[0], 0, chan_index );
02014          FETCH( &r[1], 1, chan_index );
02015          micro_mul( &r[0], &r[0], &r[1] );
02016          FETCH( &r[1], 2, chan_index );
02017          micro_add( &r[0], &r[0], &r[1] );
02018          STORE( &r[0], 0, chan_index );
02019       }
02020       break;
02021 
02022    case TGSI_OPCODE_SUB:
02023       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02024          FETCH(&r[0], 0, chan_index);
02025          FETCH(&r[1], 1, chan_index);
02026 
02027          micro_sub( &r[0], &r[0], &r[1] );
02028 
02029          STORE(&r[0], 0, chan_index);
02030       }
02031       break;
02032 
02033    case TGSI_OPCODE_LERP:
02034    /* TGSI_OPCODE_LRP */
02035       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02036          FETCH(&r[0], 0, chan_index);
02037          FETCH(&r[1], 1, chan_index);
02038          FETCH(&r[2], 2, chan_index);
02039 
02040          micro_sub( &r[1], &r[1], &r[2] );
02041          micro_mul( &r[0], &r[0], &r[1] );
02042          micro_add( &r[0], &r[0], &r[2] );
02043 
02044          STORE(&r[0], 0, chan_index);
02045       }
02046       break;
02047 
02048    case TGSI_OPCODE_CND:
02049       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02050          FETCH(&r[0], 0, chan_index);
02051          FETCH(&r[1], 1, chan_index);
02052          FETCH(&r[2], 2, chan_index);
02053          micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
02054          STORE(&r[0], 0, chan_index);
02055       }
02056       break;
02057 
02058    case TGSI_OPCODE_CND0:
02059       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02060          FETCH(&r[0], 0, chan_index);
02061          FETCH(&r[1], 1, chan_index);
02062          FETCH(&r[2], 2, chan_index);
02063          micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
02064          STORE(&r[0], 0, chan_index);
02065       }
02066       break;
02067 
02068    case TGSI_OPCODE_DOT2ADD:
02069    /* TGSI_OPCODE_DP2A */
02070       FETCH( &r[0], 0, CHAN_X );
02071       FETCH( &r[1], 1, CHAN_X );
02072       micro_mul( &r[0], &r[0], &r[1] );
02073 
02074       FETCH( &r[1], 0, CHAN_Y );
02075       FETCH( &r[2], 1, CHAN_Y );
02076       micro_mul( &r[1], &r[1], &r[2] );
02077       micro_add( &r[0], &r[0], &r[1] );
02078 
02079       FETCH( &r[2], 2, CHAN_X );
02080       micro_add( &r[0], &r[0], &r[2] );
02081 
02082       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02083          STORE( &r[0], 0, chan_index );
02084       }
02085       break;
02086 
02087    case TGSI_OPCODE_INDEX:
02088       assert (0);
02089       break;
02090 
02091    case TGSI_OPCODE_NEGATE:
02092       assert (0);
02093       break;
02094 
02095    case TGSI_OPCODE_FRAC:
02096    /* TGSI_OPCODE_FRC */
02097       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02098          FETCH( &r[0], 0, chan_index );
02099          micro_frc( &r[0], &r[0] );
02100          STORE( &r[0], 0, chan_index );
02101       }
02102       break;
02103 
02104    case TGSI_OPCODE_CLAMP:
02105       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02106          FETCH(&r[0], 0, chan_index);
02107          FETCH(&r[1], 1, chan_index);
02108          micro_max(&r[0], &r[0], &r[1]);
02109          FETCH(&r[1], 2, chan_index);
02110          micro_min(&r[0], &r[0], &r[1]);
02111          STORE(&r[0], 0, chan_index);
02112       }
02113       break;
02114 
02115    case TGSI_OPCODE_ROUND:
02116    case TGSI_OPCODE_ARR:
02117       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02118          FETCH( &r[0], 0, chan_index );
02119          micro_rnd( &r[0], &r[0] );
02120          STORE( &r[0], 0, chan_index );
02121       }
02122       break;
02123 
02124    case TGSI_OPCODE_EXPBASE2:
02125    /* TGSI_OPCODE_EX2 */
02126       FETCH(&r[0], 0, CHAN_X);
02127 
02128 #if FAST_MATH
02129       micro_exp2( &r[0], &r[0] );
02130 #else
02131       micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
02132 #endif
02133 
02134       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02135          STORE( &r[0], 0, chan_index );
02136       }
02137       break;
02138 
02139    case TGSI_OPCODE_LOGBASE2:
02140    /* TGSI_OPCODE_LG2 */
02141       FETCH( &r[0], 0, CHAN_X );
02142       micro_lg2( &r[0], &r[0] );
02143       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02144          STORE( &r[0], 0, chan_index );
02145       }
02146       break;
02147 
02148    case TGSI_OPCODE_POWER:
02149    /* TGSI_OPCODE_POW */
02150       FETCH(&r[0], 0, CHAN_X);
02151       FETCH(&r[1], 1, CHAN_X);
02152 
02153       micro_pow( &r[0], &r[0], &r[1] );
02154 
02155       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02156          STORE( &r[0], 0, chan_index );
02157       }
02158       break;
02159 
02160    case TGSI_OPCODE_CROSSPRODUCT:
02161    /* TGSI_OPCODE_XPD */
02162       FETCH(&r[0], 0, CHAN_Y);
02163       FETCH(&r[1], 1, CHAN_Z);
02164 
02165       micro_mul( &r[2], &r[0], &r[1] );
02166 
02167       FETCH(&r[3], 0, CHAN_Z);
02168       FETCH(&r[4], 1, CHAN_Y);
02169 
02170       micro_mul( &r[5], &r[3], &r[4] );
02171       micro_sub( &r[2], &r[2], &r[5] );
02172 
02173       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
02174          STORE( &r[2], 0, CHAN_X );
02175       }
02176 
02177       FETCH(&r[2], 1, CHAN_X);
02178 
02179       micro_mul( &r[3], &r[3], &r[2] );
02180 
02181       FETCH(&r[5], 0, CHAN_X);
02182 
02183       micro_mul( &r[1], &r[1], &r[5] );
02184       micro_sub( &r[3], &r[3], &r[1] );
02185 
02186       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
02187          STORE( &r[3], 0, CHAN_Y );
02188       }
02189 
02190       micro_mul( &r[5], &r[5], &r[4] );
02191       micro_mul( &r[0], &r[0], &r[2] );
02192       micro_sub( &r[5], &r[5], &r[0] );
02193 
02194       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
02195          STORE( &r[5], 0, CHAN_Z );
02196       }
02197 
02198       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
02199          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
02200       }
02201       break;
02202 
02203     case TGSI_OPCODE_MULTIPLYMATRIX:
02204        assert (0);
02205        break;
02206 
02207     case TGSI_OPCODE_ABS:
02208        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02209           FETCH(&r[0], 0, chan_index);
02210 
02211           micro_abs( &r[0], &r[0] );
02212 
02213           STORE(&r[0], 0, chan_index);
02214        }
02215        break;
02216 
02217    case TGSI_OPCODE_RCC:
02218       FETCH(&r[0], 0, CHAN_X);
02219       micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
02220       micro_float_clamp(&r[0], &r[0]);
02221       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02222          STORE(&r[0], 0, chan_index);
02223       }
02224       break;
02225 
02226    case TGSI_OPCODE_DPH:
02227       FETCH(&r[0], 0, CHAN_X);
02228       FETCH(&r[1], 1, CHAN_X);
02229 
02230       micro_mul( &r[0], &r[0], &r[1] );
02231 
02232       FETCH(&r[1], 0, CHAN_Y);
02233       FETCH(&r[2], 1, CHAN_Y);
02234 
02235       micro_mul( &r[1], &r[1], &r[2] );
02236       micro_add( &r[0], &r[0], &r[1] );
02237 
02238       FETCH(&r[1], 0, CHAN_Z);
02239       FETCH(&r[2], 1, CHAN_Z);
02240 
02241       micro_mul( &r[1], &r[1], &r[2] );
02242       micro_add( &r[0], &r[0], &r[1] );
02243 
02244       FETCH(&r[1], 1, CHAN_W);
02245 
02246       micro_add( &r[0], &r[0], &r[1] );
02247 
02248       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02249          STORE( &r[0], 0, chan_index );
02250       }
02251       break;
02252 
02253    case TGSI_OPCODE_COS:
02254       FETCH(&r[0], 0, CHAN_X);
02255 
02256       micro_cos( &r[0], &r[0] );
02257 
02258       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02259          STORE( &r[0], 0, chan_index );
02260       }
02261       break;
02262 
02263    case TGSI_OPCODE_DDX:
02264       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02265          FETCH( &r[0], 0, chan_index );
02266          micro_ddx( &r[0], &r[0] );
02267          STORE( &r[0], 0, chan_index );
02268       }
02269       break;
02270 
02271    case TGSI_OPCODE_DDY:
02272       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02273          FETCH( &r[0], 0, chan_index );
02274          micro_ddy( &r[0], &r[0] );
02275          STORE( &r[0], 0, chan_index );
02276       }
02277       break;
02278 
02279    case TGSI_OPCODE_KILP:
02280       exec_kilp (mach, inst);
02281       break;
02282 
02283    case TGSI_OPCODE_KIL:
02284       exec_kil (mach, inst);
02285       break;
02286 
02287    case TGSI_OPCODE_PK2H:
02288       assert (0);
02289       break;
02290 
02291    case TGSI_OPCODE_PK2US:
02292       assert (0);
02293       break;
02294 
02295    case TGSI_OPCODE_PK4B:
02296       assert (0);
02297       break;
02298 
02299    case TGSI_OPCODE_PK4UB:
02300       assert (0);
02301       break;
02302 
02303    case TGSI_OPCODE_RFL:
02304       if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02305           IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02306           IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02307          /* r0 = dp3(src0, src0) */
02308          FETCH(&r[2], 0, CHAN_X);
02309          micro_mul(&r[0], &r[2], &r[2]);
02310          FETCH(&r[4], 0, CHAN_Y);
02311          micro_mul(&r[8], &r[4], &r[4]);
02312          micro_add(&r[0], &r[0], &r[8]);
02313          FETCH(&r[6], 0, CHAN_Z);
02314          micro_mul(&r[8], &r[6], &r[6]);
02315          micro_add(&r[0], &r[0], &r[8]);
02316 
02317          /* r1 = dp3(src0, src1) */
02318          FETCH(&r[3], 1, CHAN_X);
02319          micro_mul(&r[1], &r[2], &r[3]);
02320          FETCH(&r[5], 1, CHAN_Y);
02321          micro_mul(&r[8], &r[4], &r[5]);
02322          micro_add(&r[1], &r[1], &r[8]);
02323          FETCH(&r[7], 1, CHAN_Z);
02324          micro_mul(&r[8], &r[6], &r[7]);
02325          micro_add(&r[1], &r[1], &r[8]);
02326 
02327          /* r1 = 2 * r1 / r0 */
02328          micro_add(&r[1], &r[1], &r[1]);
02329          micro_div(&r[1], &r[1], &r[0]);
02330 
02331          if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02332             micro_mul(&r[2], &r[2], &r[1]);
02333             micro_sub(&r[2], &r[2], &r[3]);
02334             STORE(&r[2], 0, CHAN_X);
02335          }
02336          if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02337             micro_mul(&r[4], &r[4], &r[1]);
02338             micro_sub(&r[4], &r[4], &r[5]);
02339             STORE(&r[4], 0, CHAN_Y);
02340          }
02341          if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02342             micro_mul(&r[6], &r[6], &r[1]);
02343             micro_sub(&r[6], &r[6], &r[7]);
02344             STORE(&r[6], 0, CHAN_Z);
02345          }
02346       }
02347       if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02348          STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
02349       }
02350       break;
02351 
02352    case TGSI_OPCODE_SEQ:
02353       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02354          FETCH( &r[0], 0, chan_index );
02355          FETCH( &r[1], 1, chan_index );
02356          micro_eq( &r[0], &r[0], &r[1],
02357                    &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
02358                    &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02359          STORE( &r[0], 0, chan_index );
02360       }
02361       break;
02362 
02363    case TGSI_OPCODE_SFL:
02364       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02365          STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
02366       }
02367       break;
02368 
02369    case TGSI_OPCODE_SGT:
02370       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02371          FETCH( &r[0], 0, chan_index );
02372          FETCH( &r[1], 1, chan_index );
02373          micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02374          STORE( &r[0], 0, chan_index );
02375       }
02376       break;
02377 
02378    case TGSI_OPCODE_SIN:
02379       FETCH( &r[0], 0, CHAN_X );
02380       micro_sin( &r[0], &r[0] );
02381       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02382          STORE( &r[0], 0, chan_index );
02383       }
02384       break;
02385 
02386    case TGSI_OPCODE_SLE:
02387       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02388          FETCH( &r[0], 0, chan_index );
02389          FETCH( &r[1], 1, chan_index );
02390          micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02391          STORE( &r[0], 0, chan_index );
02392       }
02393       break;
02394 
02395    case TGSI_OPCODE_SNE:
02396       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02397          FETCH( &r[0], 0, chan_index );
02398          FETCH( &r[1], 1, chan_index );
02399          micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02400          STORE( &r[0], 0, chan_index );
02401       }
02402       break;
02403 
02404    case TGSI_OPCODE_STR:
02405       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02406          STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
02407       }
02408       break;
02409 
02410    case TGSI_OPCODE_TEX:
02411       /* simple texture lookup */
02412       /* src[0] = texcoord */
02413       /* src[1] = sampler unit */
02414       exec_tex(mach, inst, FALSE, FALSE);
02415       break;
02416 
02417    case TGSI_OPCODE_TXB:
02418       /* Texture lookup with lod bias */
02419       /* src[0] = texcoord (src[0].w = LOD bias) */
02420       /* src[1] = sampler unit */
02421       exec_tex(mach, inst, TRUE, FALSE);
02422       break;
02423 
02424    case TGSI_OPCODE_TXD:
02425       /* Texture lookup with explict partial derivatives */
02426       /* src[0] = texcoord */
02427       /* src[1] = d[strq]/dx */
02428       /* src[2] = d[strq]/dy */
02429       /* src[3] = sampler unit */
02430       assert (0);
02431       break;
02432 
02433    case TGSI_OPCODE_TXL:
02434       /* Texture lookup with explit LOD */
02435       /* src[0] = texcoord (src[0].w = LOD) */
02436       /* src[1] = sampler unit */
02437       exec_tex(mach, inst, TRUE, FALSE);
02438       break;
02439 
02440    case TGSI_OPCODE_TXP:
02441       /* Texture lookup with projection */
02442       /* src[0] = texcoord (src[0].w = projection) */
02443       /* src[1] = sampler unit */
02444       exec_tex(mach, inst, FALSE, TRUE);
02445       break;
02446 
02447    case TGSI_OPCODE_UP2H:
02448       assert (0);
02449       break;
02450 
02451    case TGSI_OPCODE_UP2US:
02452       assert (0);
02453       break;
02454 
02455    case TGSI_OPCODE_UP4B:
02456       assert (0);
02457       break;
02458 
02459    case TGSI_OPCODE_UP4UB:
02460       assert (0);
02461       break;
02462 
02463    case TGSI_OPCODE_X2D:
02464       FETCH(&r[0], 1, CHAN_X);
02465       FETCH(&r[1], 1, CHAN_Y);
02466       if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02467           IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02468          FETCH(&r[2], 2, CHAN_X);
02469          micro_mul(&r[2], &r[2], &r[0]);
02470          FETCH(&r[3], 2, CHAN_Y);
02471          micro_mul(&r[3], &r[3], &r[1]);
02472          micro_add(&r[2], &r[2], &r[3]);
02473          FETCH(&r[3], 0, CHAN_X);
02474          micro_add(&r[2], &r[2], &r[3]);
02475          if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02476             STORE(&r[2], 0, CHAN_X);
02477          }
02478          if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02479             STORE(&r[2], 0, CHAN_Z);
02480          }
02481       }
02482       if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02483           IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02484          FETCH(&r[2], 2, CHAN_Z);
02485          micro_mul(&r[2], &r[2], &r[0]);
02486          FETCH(&r[3], 2, CHAN_W);
02487          micro_mul(&r[3], &r[3], &r[1]);
02488          micro_add(&r[2], &r[2], &r[3]);
02489          FETCH(&r[3], 0, CHAN_Y);
02490          micro_add(&r[2], &r[2], &r[3]);
02491          if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02492             STORE(&r[2], 0, CHAN_Y);
02493          }
02494          if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02495             STORE(&r[2], 0, CHAN_W);
02496          }
02497       }
02498       break;
02499 
02500    case TGSI_OPCODE_ARA:
02501       assert (0);
02502       break;
02503 
02504    case TGSI_OPCODE_BRA:
02505       assert (0);
02506       break;
02507 
02508    case TGSI_OPCODE_CAL:
02509       /* skip the call if no execution channels are enabled */
02510       if (mach->ExecMask) {
02511          /* do the call */
02512 
02513          /* First, record the depths of the execution stacks.
02514           * This is important for deeply nested/looped return statements.
02515           * We have to unwind the stacks by the correct amount.  For a
02516           * real code generator, we could determine the number of entries
02517           * to pop off each stack with simple static analysis and avoid
02518           * implementing this data structure at run time.
02519           */
02520          mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
02521          mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
02522          mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
02523          /* note that PC was already incremented above */
02524          mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
02525 
02526          mach->CallStackTop++;
02527 
02528          /* Second, push the Cond, Loop, Cont, Func stacks */
02529          assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
02530          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
02531          assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02532          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
02533          assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02534          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
02535          assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
02536          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
02537 
02538          /* Finally, jump to the subroutine */
02539          *pc = inst->InstructionExtLabel.Label;
02540       }
02541       break;
02542 
02543    case TGSI_OPCODE_RET:
02544       mach->FuncMask &= ~mach->ExecMask;
02545       UPDATE_EXEC_MASK(mach);
02546 
02547       if (mach->FuncMask == 0x0) {
02548          /* really return now (otherwise, keep executing */
02549 
02550          if (mach->CallStackTop == 0) {
02551             /* returning from main() */
02552             *pc = -1;
02553             return;
02554          }
02555 
02556          assert(mach->CallStackTop > 0);
02557          mach->CallStackTop--;
02558 
02559          mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
02560          mach->CondMask = mach->CondStack[mach->CondStackTop];
02561 
02562          mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
02563          mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
02564 
02565          mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
02566          mach->ContMask = mach->ContStack[mach->ContStackTop];
02567 
02568          assert(mach->FuncStackTop > 0);
02569          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
02570 
02571          *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
02572 
02573          UPDATE_EXEC_MASK(mach);
02574       }
02575       break;
02576 
02577    case TGSI_OPCODE_SSG:
02578    /* TGSI_OPCODE_SGN */
02579       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02580          FETCH( &r[0], 0, chan_index );
02581          micro_sgn( &r[0], &r[0] );
02582          STORE( &r[0], 0, chan_index );
02583       }
02584       break;
02585 
02586    case TGSI_OPCODE_CMP:
02587       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02588          FETCH(&r[0], 0, chan_index);
02589          FETCH(&r[1], 1, chan_index);
02590          FETCH(&r[2], 2, chan_index);
02591 
02592          micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
02593 
02594          STORE(&r[0], 0, chan_index);
02595       }
02596       break;
02597 
02598    case TGSI_OPCODE_SCS:
02599       if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
02600          FETCH( &r[0], 0, CHAN_X );
02601          if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02602             micro_cos(&r[1], &r[0]);
02603             STORE(&r[1], 0, CHAN_X);
02604          }
02605          if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02606             micro_sin(&r[1], &r[0]);
02607             STORE(&r[1], 0, CHAN_Y);
02608          }
02609       }
02610       if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
02611          STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
02612       }
02613       if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
02614          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
02615       }
02616       break;
02617 
02618    case TGSI_OPCODE_NRM:
02619       /* 3-component vector normalize */
02620       if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02621          IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02622          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02623          /* r3 = sqrt(dp3(src0, src0)) */
02624          FETCH(&r[0], 0, CHAN_X);
02625          micro_mul(&r[3], &r[0], &r[0]);
02626          FETCH(&r[1], 0, CHAN_Y);
02627          micro_mul(&r[4], &r[1], &r[1]);
02628          micro_add(&r[3], &r[3], &r[4]);
02629          FETCH(&r[2], 0, CHAN_Z);
02630          micro_mul(&r[4], &r[2], &r[2]);
02631          micro_add(&r[3], &r[3], &r[4]);
02632          micro_sqrt(&r[3], &r[3]);
02633 
02634          if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02635             micro_div(&r[0], &r[0], &r[3]);
02636             STORE(&r[0], 0, CHAN_X);
02637          }
02638          if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02639             micro_div(&r[1], &r[1], &r[3]);
02640             STORE(&r[1], 0, CHAN_Y);
02641          }
02642          if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02643             micro_div(&r[2], &r[2], &r[3]);
02644             STORE(&r[2], 0, CHAN_Z);
02645          }
02646       }
02647       if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02648          STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
02649       }
02650       break;
02651 
02652    case TGSI_OPCODE_NRM4:
02653       /* 4-component vector normalize */
02654       {
02655          union tgsi_exec_channel tmp, dot;
02656 
02657          /* tmp = dp4(src0, src0): */
02658          FETCH( &r[0], 0, CHAN_X );
02659          micro_mul( &tmp, &r[0], &r[0] );
02660 
02661          FETCH( &r[1], 0, CHAN_Y );
02662          micro_mul( &dot, &r[1], &r[1] );
02663          micro_add( &tmp, &tmp, &dot );
02664 
02665          FETCH( &r[2], 0, CHAN_Z );
02666          micro_mul( &dot, &r[2], &r[2] );
02667          micro_add( &tmp, &tmp, &dot );
02668 
02669          FETCH( &r[3], 0, CHAN_W );
02670          micro_mul( &dot, &r[3], &r[3] );
02671          micro_add( &tmp, &tmp, &dot );
02672 
02673          /* tmp = 1 / sqrt(tmp) */
02674          micro_sqrt( &tmp, &tmp );
02675          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
02676 
02677          FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02678             /* chan = chan * tmp */
02679             micro_mul( &r[chan_index], &tmp, &r[chan_index] );
02680             STORE( &r[chan_index], 0, chan_index );
02681          }
02682       }
02683       break;
02684 
02685    case TGSI_OPCODE_DIV:
02686       assert( 0 );
02687       break;
02688 
02689    case TGSI_OPCODE_DP2:
02690       FETCH( &r[0], 0, CHAN_X );
02691       FETCH( &r[1], 1, CHAN_X );
02692       micro_mul( &r[0], &r[0], &r[1] );
02693 
02694       FETCH( &r[1], 0, CHAN_Y );
02695       FETCH( &r[2], 1, CHAN_Y );
02696       micro_mul( &r[1], &r[1], &r[2] );
02697       micro_add( &r[0], &r[0], &r[1] );
02698 
02699       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02700          STORE( &r[0], 0, chan_index );
02701       }
02702       break;
02703 
02704    case TGSI_OPCODE_IF:
02705       /* push CondMask */
02706       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
02707       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
02708       FETCH( &r[0], 0, CHAN_X );
02709       /* update CondMask */
02710       if( ! r[0].f[0] ) {
02711          mach->CondMask &= ~0x1;
02712       }
02713       if( ! r[0].f[1] ) {
02714          mach->CondMask &= ~0x2;
02715       }
02716       if( ! r[0].f[2] ) {
02717          mach->CondMask &= ~0x4;
02718       }
02719       if( ! r[0].f[3] ) {
02720          mach->CondMask &= ~0x8;
02721       }
02722       UPDATE_EXEC_MASK(mach);
02723       /* Todo: If CondMask==0, jump to ELSE */
02724       break;
02725 
02726    case TGSI_OPCODE_ELSE:
02727       /* invert CondMask wrt previous mask */
02728       {
02729          uint prevMask;
02730          assert(mach->CondStackTop > 0);
02731          prevMask = mach->CondStack[mach->CondStackTop - 1];
02732          mach->CondMask = ~mach->CondMask & prevMask;
02733          UPDATE_EXEC_MASK(mach);
02734          /* Todo: If CondMask==0, jump to ENDIF */
02735       }
02736       break;
02737 
02738    case TGSI_OPCODE_ENDIF:
02739       /* pop CondMask */
02740       assert(mach->CondStackTop > 0);
02741       mach->CondMask = mach->CondStack[--mach->CondStackTop];
02742       UPDATE_EXEC_MASK(mach);
02743       break;
02744 
02745    case TGSI_OPCODE_END:
02746       /* halt execution */
02747       *pc = -1;
02748       break;
02749 
02750    case TGSI_OPCODE_REP:
02751       assert (0);
02752       break;
02753 
02754    case TGSI_OPCODE_ENDREP:
02755        assert (0);
02756        break;
02757 
02758    case TGSI_OPCODE_PUSHA:
02759       assert (0);
02760       break;
02761 
02762    case TGSI_OPCODE_POPA:
02763       assert (0);
02764       break;
02765 
02766    case TGSI_OPCODE_CEIL:
02767       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02768          FETCH( &r[0], 0, chan_index );
02769          micro_ceil( &r[0], &r[0] );
02770          STORE( &r[0], 0, chan_index );
02771       }
02772       break;
02773 
02774    case TGSI_OPCODE_I2F:
02775       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02776          FETCH( &r[0], 0, chan_index );
02777          micro_i2f( &r[0], &r[0] );
02778          STORE( &r[0], 0, chan_index );
02779       }
02780       break;
02781 
02782    case TGSI_OPCODE_NOT:
02783       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02784          FETCH( &r[0], 0, chan_index );
02785          micro_not( &r[0], &r[0] );
02786          STORE( &r[0], 0, chan_index );
02787       }
02788       break;
02789 
02790    case TGSI_OPCODE_TRUNC:
02791       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02792          FETCH( &r[0], 0, chan_index );
02793          micro_trunc( &r[0], &r[0] );
02794          STORE( &r[0], 0, chan_index );
02795       }
02796       break;
02797 
02798    case TGSI_OPCODE_SHL:
02799       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02800          FETCH( &r[0], 0, chan_index );
02801          FETCH( &r[1], 1, chan_index );
02802          micro_shl( &r[0], &r[0], &r[1] );
02803          STORE( &r[0], 0, chan_index );
02804       }
02805       break;
02806 
02807    case TGSI_OPCODE_SHR:
02808       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02809          FETCH( &r[0], 0, chan_index );
02810          FETCH( &r[1], 1, chan_index );
02811          micro_ishr( &r[0], &r[0], &r[1] );
02812          STORE( &r[0], 0, chan_index );
02813       }
02814       break;
02815 
02816    case TGSI_OPCODE_AND:
02817       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02818          FETCH( &r[0], 0, chan_index );
02819          FETCH( &r[1], 1, chan_index );
02820          micro_and( &r[0], &r[0], &r[1] );
02821          STORE( &r[0], 0, chan_index );
02822       }
02823       break;
02824 
02825    case TGSI_OPCODE_OR:
02826       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02827          FETCH( &r[0], 0, chan_index );
02828          FETCH( &r[1], 1, chan_index );
02829          micro_or( &r[0], &r[0], &r[1] );
02830          STORE( &r[0], 0, chan_index );
02831       }
02832       break;
02833 
02834    case TGSI_OPCODE_MOD:
02835       assert (0);
02836       break;
02837 
02838    case TGSI_OPCODE_XOR:
02839       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02840          FETCH( &r[0], 0, chan_index );
02841          FETCH( &r[1], 1, chan_index );
02842          micro_xor( &r[0], &r[0], &r[1] );
02843          STORE( &r[0], 0, chan_index );
02844       }
02845       break;
02846 
02847    case TGSI_OPCODE_SAD:
02848       assert (0);
02849       break;
02850 
02851    case TGSI_OPCODE_TXF:
02852       assert (0);
02853       break;
02854 
02855    case TGSI_OPCODE_TXQ:
02856       assert (0);
02857       break;
02858 
02859    case TGSI_OPCODE_EMIT:
02860       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
02861       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
02862       break;
02863 
02864    case TGSI_OPCODE_ENDPRIM:
02865       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
02866       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
02867       break;
02868 
02869    case TGSI_OPCODE_LOOP:
02870       assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02871       for (chan_index = 0; chan_index < 3; chan_index++) {
02872          FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
02873       }
02874       STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
02875       ++mach->LoopCounterStackTop;
02876       /* fall-through (for now) */
02877    case TGSI_OPCODE_BGNLOOP2:
02878       /* push LoopMask and ContMasks */
02879       assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02880       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
02881       assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02882       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
02883       assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02884       mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
02885       break;
02886 
02887    case TGSI_OPCODE_ENDLOOP:
02888       assert(mach->LoopCounterStackTop > 0);
02889       micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 
02890                  &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
02891                  &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02892       /* update LoopMask */
02893       if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
02894          mach->LoopMask &= ~0x1;
02895       }
02896       if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
02897          mach->LoopMask &= ~0x2;
02898       }
02899       if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
02900          mach->LoopMask &= ~0x4;
02901       }
02902       if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
02903          mach->LoopMask &= ~0x8;
02904       }
02905       micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 
02906                  &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 
02907                  &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
02908       assert(mach->LoopLabelStackTop > 0);
02909       inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
02910       STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
02911       /* Restore ContMask, but don't pop */
02912       assert(mach->ContStackTop > 0);
02913       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
02914       UPDATE_EXEC_MASK(mach);
02915       if (mach->ExecMask) {
02916          /* repeat loop: jump to instruction just past BGNLOOP */
02917          assert(mach->LoopLabelStackTop > 0);
02918          *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
02919       }
02920       else {
02921          /* exit loop: pop LoopMask */
02922          assert(mach->LoopStackTop > 0);
02923          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
02924          /* pop ContMask */
02925          assert(mach->ContStackTop > 0);
02926          mach->ContMask = mach->ContStack[--mach->ContStackTop];
02927          assert(mach->LoopLabelStackTop > 0);
02928          --mach->LoopLabelStackTop;
02929          assert(mach->LoopCounterStackTop > 0);
02930          --mach->LoopCounterStackTop;
02931       }
02932       UPDATE_EXEC_MASK(mach);
02933       break;
02934       
02935    case TGSI_OPCODE_ENDLOOP2:
02936       /* Restore ContMask, but don't pop */
02937       assert(mach->ContStackTop > 0);
02938       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
02939       UPDATE_EXEC_MASK(mach);
02940       if (mach->ExecMask) {
02941          /* repeat loop: jump to instruction just past BGNLOOP */
02942          assert(mach->LoopLabelStackTop > 0);
02943          *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
02944       }
02945       else {
02946          /* exit loop: pop LoopMask */
02947          assert(mach->LoopStackTop > 0);
02948          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
02949          /* pop ContMask */
02950          assert(mach->ContStackTop > 0);
02951          mach->ContMask = mach->ContStack[--mach->ContStackTop];
02952          assert(mach->LoopLabelStackTop > 0);
02953          --mach->LoopLabelStackTop;
02954       }
02955       UPDATE_EXEC_MASK(mach);
02956       break;
02957 
02958    case TGSI_OPCODE_BRK:
02959       /* turn off loop channels for each enabled exec channel */
02960       mach->LoopMask &= ~mach->ExecMask;
02961       /* Todo: if mach->LoopMask == 0, jump to end of loop */
02962       UPDATE_EXEC_MASK(mach);
02963       break;
02964 
02965    case TGSI_OPCODE_CONT:
02966       /* turn off cont channels for each enabled exec channel */
02967       mach->ContMask &= ~mach->ExecMask;
02968       /* Todo: if mach->LoopMask == 0, jump to end of loop */
02969       UPDATE_EXEC_MASK(mach);
02970       break;
02971 
02972    case TGSI_OPCODE_BGNSUB:
02973       /* no-op */
02974       break;
02975 
02976    case TGSI_OPCODE_ENDSUB:
02977       /* no-op */
02978       break;
02979 
02980    case TGSI_OPCODE_NOISE1:
02981       assert( 0 );
02982       break;
02983 
02984    case TGSI_OPCODE_NOISE2:
02985       assert( 0 );
02986       break;
02987 
02988    case TGSI_OPCODE_NOISE3:
02989       assert( 0 );
02990       break;
02991 
02992    case TGSI_OPCODE_NOISE4:
02993       assert( 0 );
02994       break;
02995 
02996    case TGSI_OPCODE_NOP:
02997       break;
02998 
02999    default:
03000       assert( 0 );
03001    }
03002 }
03003 
03004 
03009 uint
03010 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
03011 {
03012    uint i;
03013    int pc = 0;
03014 
03015    mach->CondMask = 0xf;
03016    mach->LoopMask = 0xf;
03017    mach->ContMask = 0xf;
03018    mach->FuncMask = 0xf;
03019    mach->ExecMask = 0xf;
03020 
03021    mach->CondStackTop = 0;
03022    mach->LoopStackTop = 0;
03023    mach->ContStackTop = 0;
03024    mach->CallStackTop = 0;
03025 
03026    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
03027    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
03028 
03029    if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
03030       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
03031       mach->Primitives[0] = 0;
03032    }
03033 
03034    for (i = 0; i < QUAD_SIZE; i++) {
03035       mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
03036          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
03037          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
03038          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
03039          (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
03040    }
03041 
03042    /* execute declarations (interpolants) */
03043    for (i = 0; i < mach->NumDeclarations; i++) {
03044       exec_declaration( mach, mach->Declarations+i );
03045    }
03046 
03047    /* execute instructions, until pc is set to -1 */
03048    while (pc != -1) {
03049       assert(pc < (int) mach->NumInstructions);
03050       exec_instruction( mach, mach->Instructions + pc, &pc );
03051    }
03052 
03053    assert(mach->CondStackTop == 0);
03054    assert(mach->LoopStackTop == 0);
03055    assert(mach->ContStackTop == 0);
03056    assert(mach->CallStackTop == 0);
03057    
03058 #if 0
03059    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
03060    if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
03061       /*
03062        * Scale back depth component.
03063        */
03064       for (i = 0; i < 4; i++)
03065          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
03066    }
03067 #endif
03068 
03069    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
03070 }
03071 
03072 

Generated on Tue Sep 29 06:25:15 2009 for Gallium3D by  doxygen 1.5.4