00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00053 #include "pipe/p_compiler.h"
00054 #include "pipe/p_state.h"
00055 #include "pipe/p_shader_tokens.h"
00056 #include "tgsi/tgsi_parse.h"
00057 #include "tgsi/tgsi_util.h"
00058 #include "tgsi_exec.h"
00059 #include "util/u_memory.h"
00060 #include "util/u_math.h"
00061
00062 #define FAST_MATH 1
00063
00064 #define TILE_TOP_LEFT 0
00065 #define TILE_TOP_RIGHT 1
00066 #define TILE_BOTTOM_LEFT 2
00067 #define TILE_BOTTOM_RIGHT 3
00068
00069 #define CHAN_X 0
00070 #define CHAN_Y 1
00071 #define CHAN_Z 2
00072 #define CHAN_W 3
00073
00074
00075
00076
00077 #define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
00078 #define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
00079 #define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
00080 #define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
00081 #define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
00082 #define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
00083 #define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
00084 #define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
00085 #define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
00086 #define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
00087 #define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
00088 #define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
00089 #define TEMP_128_I TGSI_EXEC_TEMP_128_I
00090 #define TEMP_128_C TGSI_EXEC_TEMP_128_C
00091 #define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
00092 #define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
00093 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
00094 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
00095 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
00096 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
00097 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
00098 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
00099 #define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
00100 #define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
00101 #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
00102 #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
00103 #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
00104 #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
00105 #define TEMP_R0 TGSI_EXEC_TEMP_R0
00106
00107 #define IS_CHANNEL_ENABLED(INST, CHAN)\
00108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
00109
00110 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
00111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
00112
00113 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
00114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
00115 if (IS_CHANNEL_ENABLED( INST, CHAN ))
00116
00117 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
00118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
00119 if (IS_CHANNEL_ENABLED2( INST, CHAN ))
00120
00121
00123 #define UPDATE_EXEC_MASK(MACH) \
00124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
00125
00131 void
00132 tgsi_exec_machine_bind_shader(
00133 struct tgsi_exec_machine *mach,
00134 const struct tgsi_token *tokens,
00135 uint numSamplers,
00136 struct tgsi_sampler *samplers)
00137 {
00138 uint k;
00139 struct tgsi_parse_context parse;
00140 struct tgsi_exec_labels *labels = &mach->Labels;
00141 struct tgsi_full_instruction *instructions;
00142 struct tgsi_full_declaration *declarations;
00143 uint maxInstructions = 10, numInstructions = 0;
00144 uint maxDeclarations = 10, numDeclarations = 0;
00145 uint instno = 0;
00146
00147 #if 0
00148 tgsi_dump(tokens, 0);
00149 #endif
00150
00151 util_init_math();
00152
00153 mach->Tokens = tokens;
00154 mach->Samplers = samplers;
00155
00156 k = tgsi_parse_init (&parse, mach->Tokens);
00157 if (k != TGSI_PARSE_OK) {
00158 debug_printf( "Problem parsing!\n" );
00159 return;
00160 }
00161
00162 mach->Processor = parse.FullHeader.Processor.Processor;
00163 mach->ImmLimit = 0;
00164 labels->count = 0;
00165
00166 declarations = (struct tgsi_full_declaration *)
00167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
00168
00169 if (!declarations) {
00170 return;
00171 }
00172
00173 instructions = (struct tgsi_full_instruction *)
00174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
00175
00176 if (!instructions) {
00177 FREE( declarations );
00178 return;
00179 }
00180
00181 while( !tgsi_parse_end_of_tokens( &parse ) ) {
00182 uint pointer = parse.Position;
00183 uint i;
00184
00185 tgsi_parse_token( &parse );
00186 switch( parse.FullToken.Token.Type ) {
00187 case TGSI_TOKEN_TYPE_DECLARATION:
00188
00189 if (numDeclarations == maxDeclarations) {
00190 declarations = REALLOC(declarations,
00191 maxDeclarations
00192 * sizeof(struct tgsi_full_declaration),
00193 (maxDeclarations + 10)
00194 * sizeof(struct tgsi_full_declaration));
00195 maxDeclarations += 10;
00196 }
00197 memcpy(declarations + numDeclarations,
00198 &parse.FullToken.FullDeclaration,
00199 sizeof(declarations[0]));
00200 numDeclarations++;
00201 break;
00202
00203 case TGSI_TOKEN_TYPE_IMMEDIATE:
00204 {
00205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
00206 assert( size % 4 == 0 );
00207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
00208
00209 for( i = 0; i < size; i++ ) {
00210 mach->Imms[mach->ImmLimit + i / 4][i % 4] =
00211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
00212 }
00213 mach->ImmLimit += size / 4;
00214 }
00215 break;
00216
00217 case TGSI_TOKEN_TYPE_INSTRUCTION:
00218 assert( labels->count < MAX_LABELS );
00219
00220 labels->labels[labels->count][0] = instno;
00221 labels->labels[labels->count][1] = pointer;
00222 labels->count++;
00223
00224
00225 if (numInstructions == maxInstructions) {
00226 instructions = REALLOC(instructions,
00227 maxInstructions
00228 * sizeof(struct tgsi_full_instruction),
00229 (maxInstructions + 10)
00230 * sizeof(struct tgsi_full_instruction));
00231 maxInstructions += 10;
00232 }
00233 memcpy(instructions + numInstructions,
00234 &parse.FullToken.FullInstruction,
00235 sizeof(instructions[0]));
00236 numInstructions++;
00237 break;
00238
00239 default:
00240 assert( 0 );
00241 }
00242 }
00243 tgsi_parse_free (&parse);
00244
00245 if (mach->Declarations) {
00246 FREE( mach->Declarations );
00247 }
00248 mach->Declarations = declarations;
00249 mach->NumDeclarations = numDeclarations;
00250
00251 if (mach->Instructions) {
00252 FREE( mach->Instructions );
00253 }
00254 mach->Instructions = instructions;
00255 mach->NumInstructions = numInstructions;
00256 }
00257
00258
00259 void
00260 tgsi_exec_machine_init(
00261 struct tgsi_exec_machine *mach )
00262 {
00263 uint i;
00264
00265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
00266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
00267
00268
00269 for( i = 0; i < 4; i++ ) {
00270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
00271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
00272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
00273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
00274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
00275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
00276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
00277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
00278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
00279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
00280 }
00281 }
00282
00283
00284 void
00285 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
00286 {
00287 if (mach->Instructions) {
00288 FREE(mach->Instructions);
00289 mach->Instructions = NULL;
00290 mach->NumInstructions = 0;
00291 }
00292 if (mach->Declarations) {
00293 FREE(mach->Declarations);
00294 mach->Declarations = NULL;
00295 mach->NumDeclarations = 0;
00296 }
00297 }
00298
00299
00300 static void
00301 micro_abs(
00302 union tgsi_exec_channel *dst,
00303 const union tgsi_exec_channel *src )
00304 {
00305 dst->f[0] = fabsf( src->f[0] );
00306 dst->f[1] = fabsf( src->f[1] );
00307 dst->f[2] = fabsf( src->f[2] );
00308 dst->f[3] = fabsf( src->f[3] );
00309 }
00310
00311 static void
00312 micro_add(
00313 union tgsi_exec_channel *dst,
00314 const union tgsi_exec_channel *src0,
00315 const union tgsi_exec_channel *src1 )
00316 {
00317 dst->f[0] = src0->f[0] + src1->f[0];
00318 dst->f[1] = src0->f[1] + src1->f[1];
00319 dst->f[2] = src0->f[2] + src1->f[2];
00320 dst->f[3] = src0->f[3] + src1->f[3];
00321 }
00322
00323 static void
00324 micro_iadd(
00325 union tgsi_exec_channel *dst,
00326 const union tgsi_exec_channel *src0,
00327 const union tgsi_exec_channel *src1 )
00328 {
00329 dst->i[0] = src0->i[0] + src1->i[0];
00330 dst->i[1] = src0->i[1] + src1->i[1];
00331 dst->i[2] = src0->i[2] + src1->i[2];
00332 dst->i[3] = src0->i[3] + src1->i[3];
00333 }
00334
00335 static void
00336 micro_and(
00337 union tgsi_exec_channel *dst,
00338 const union tgsi_exec_channel *src0,
00339 const union tgsi_exec_channel *src1 )
00340 {
00341 dst->u[0] = src0->u[0] & src1->u[0];
00342 dst->u[1] = src0->u[1] & src1->u[1];
00343 dst->u[2] = src0->u[2] & src1->u[2];
00344 dst->u[3] = src0->u[3] & src1->u[3];
00345 }
00346
00347 static void
00348 micro_ceil(
00349 union tgsi_exec_channel *dst,
00350 const union tgsi_exec_channel *src )
00351 {
00352 dst->f[0] = ceilf( src->f[0] );
00353 dst->f[1] = ceilf( src->f[1] );
00354 dst->f[2] = ceilf( src->f[2] );
00355 dst->f[3] = ceilf( src->f[3] );
00356 }
00357
00358 static void
00359 micro_cos(
00360 union tgsi_exec_channel *dst,
00361 const union tgsi_exec_channel *src )
00362 {
00363 dst->f[0] = cosf( src->f[0] );
00364 dst->f[1] = cosf( src->f[1] );
00365 dst->f[2] = cosf( src->f[2] );
00366 dst->f[3] = cosf( src->f[3] );
00367 }
00368
00369 static void
00370 micro_ddx(
00371 union tgsi_exec_channel *dst,
00372 const union tgsi_exec_channel *src )
00373 {
00374 dst->f[0] =
00375 dst->f[1] =
00376 dst->f[2] =
00377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
00378 }
00379
00380 static void
00381 micro_ddy(
00382 union tgsi_exec_channel *dst,
00383 const union tgsi_exec_channel *src )
00384 {
00385 dst->f[0] =
00386 dst->f[1] =
00387 dst->f[2] =
00388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
00389 }
00390
00391 static void
00392 micro_div(
00393 union tgsi_exec_channel *dst,
00394 const union tgsi_exec_channel *src0,
00395 const union tgsi_exec_channel *src1 )
00396 {
00397 if (src1->f[0] != 0) {
00398 dst->f[0] = src0->f[0] / src1->f[0];
00399 }
00400 if (src1->f[1] != 0) {
00401 dst->f[1] = src0->f[1] / src1->f[1];
00402 }
00403 if (src1->f[2] != 0) {
00404 dst->f[2] = src0->f[2] / src1->f[2];
00405 }
00406 if (src1->f[3] != 0) {
00407 dst->f[3] = src0->f[3] / src1->f[3];
00408 }
00409 }
00410
00411 static void
00412 micro_udiv(
00413 union tgsi_exec_channel *dst,
00414 const union tgsi_exec_channel *src0,
00415 const union tgsi_exec_channel *src1 )
00416 {
00417 dst->u[0] = src0->u[0] / src1->u[0];
00418 dst->u[1] = src0->u[1] / src1->u[1];
00419 dst->u[2] = src0->u[2] / src1->u[2];
00420 dst->u[3] = src0->u[3] / src1->u[3];
00421 }
00422
00423 static void
00424 micro_eq(
00425 union tgsi_exec_channel *dst,
00426 const union tgsi_exec_channel *src0,
00427 const union tgsi_exec_channel *src1,
00428 const union tgsi_exec_channel *src2,
00429 const union tgsi_exec_channel *src3 )
00430 {
00431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
00432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
00433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
00434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
00435 }
00436
00437 static void
00438 micro_ieq(
00439 union tgsi_exec_channel *dst,
00440 const union tgsi_exec_channel *src0,
00441 const union tgsi_exec_channel *src1,
00442 const union tgsi_exec_channel *src2,
00443 const union tgsi_exec_channel *src3 )
00444 {
00445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
00446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
00447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
00448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
00449 }
00450
00451 static void
00452 micro_exp2(
00453 union tgsi_exec_channel *dst,
00454 const union tgsi_exec_channel *src)
00455 {
00456 #if FAST_MATH
00457 dst->f[0] = util_fast_exp2( src->f[0] );
00458 dst->f[1] = util_fast_exp2( src->f[1] );
00459 dst->f[2] = util_fast_exp2( src->f[2] );
00460 dst->f[3] = util_fast_exp2( src->f[3] );
00461 #else
00462 dst->f[0] = powf( 2.0f, src->f[0] );
00463 dst->f[1] = powf( 2.0f, src->f[1] );
00464 dst->f[2] = powf( 2.0f, src->f[2] );
00465 dst->f[3] = powf( 2.0f, src->f[3] );
00466 #endif
00467 }
00468
00469 static void
00470 micro_f2ut(
00471 union tgsi_exec_channel *dst,
00472 const union tgsi_exec_channel *src )
00473 {
00474 dst->u[0] = (uint) src->f[0];
00475 dst->u[1] = (uint) src->f[1];
00476 dst->u[2] = (uint) src->f[2];
00477 dst->u[3] = (uint) src->f[3];
00478 }
00479
00480 static void
00481 micro_float_clamp(union tgsi_exec_channel *dst,
00482 const union tgsi_exec_channel *src)
00483 {
00484 uint i;
00485
00486 for (i = 0; i < 4; i++) {
00487 if (src->f[i] > 0.0f) {
00488 if (src->f[i] > 1.884467e+019f)
00489 dst->f[i] = 1.884467e+019f;
00490 else if (src->f[i] < 5.42101e-020f)
00491 dst->f[i] = 5.42101e-020f;
00492 else
00493 dst->f[i] = src->f[i];
00494 }
00495 else {
00496 if (src->f[i] < -1.884467e+019f)
00497 dst->f[i] = -1.884467e+019f;
00498 else if (src->f[i] > -5.42101e-020f)
00499 dst->f[i] = -5.42101e-020f;
00500 else
00501 dst->f[i] = src->f[i];
00502 }
00503 }
00504 }
00505
00506 static void
00507 micro_flr(
00508 union tgsi_exec_channel *dst,
00509 const union tgsi_exec_channel *src )
00510 {
00511 dst->f[0] = floorf( src->f[0] );
00512 dst->f[1] = floorf( src->f[1] );
00513 dst->f[2] = floorf( src->f[2] );
00514 dst->f[3] = floorf( src->f[3] );
00515 }
00516
00517 static void
00518 micro_frc(
00519 union tgsi_exec_channel *dst,
00520 const union tgsi_exec_channel *src )
00521 {
00522 dst->f[0] = src->f[0] - floorf( src->f[0] );
00523 dst->f[1] = src->f[1] - floorf( src->f[1] );
00524 dst->f[2] = src->f[2] - floorf( src->f[2] );
00525 dst->f[3] = src->f[3] - floorf( src->f[3] );
00526 }
00527
00528 static void
00529 micro_i2f(
00530 union tgsi_exec_channel *dst,
00531 const union tgsi_exec_channel *src )
00532 {
00533 dst->f[0] = (float) src->i[0];
00534 dst->f[1] = (float) src->i[1];
00535 dst->f[2] = (float) src->i[2];
00536 dst->f[3] = (float) src->i[3];
00537 }
00538
00539 static void
00540 micro_lg2(
00541 union tgsi_exec_channel *dst,
00542 const union tgsi_exec_channel *src )
00543 {
00544 #if FAST_MATH
00545 dst->f[0] = util_fast_log2( src->f[0] );
00546 dst->f[1] = util_fast_log2( src->f[1] );
00547 dst->f[2] = util_fast_log2( src->f[2] );
00548 dst->f[3] = util_fast_log2( src->f[3] );
00549 #else
00550 dst->f[0] = logf( src->f[0] ) * 1.442695f;
00551 dst->f[1] = logf( src->f[1] ) * 1.442695f;
00552 dst->f[2] = logf( src->f[2] ) * 1.442695f;
00553 dst->f[3] = logf( src->f[3] ) * 1.442695f;
00554 #endif
00555 }
00556
00557 static void
00558 micro_le(
00559 union tgsi_exec_channel *dst,
00560 const union tgsi_exec_channel *src0,
00561 const union tgsi_exec_channel *src1,
00562 const union tgsi_exec_channel *src2,
00563 const union tgsi_exec_channel *src3 )
00564 {
00565 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
00566 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
00567 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
00568 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
00569 }
00570
00571 static void
00572 micro_lt(
00573 union tgsi_exec_channel *dst,
00574 const union tgsi_exec_channel *src0,
00575 const union tgsi_exec_channel *src1,
00576 const union tgsi_exec_channel *src2,
00577 const union tgsi_exec_channel *src3 )
00578 {
00579 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
00580 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
00581 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
00582 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
00583 }
00584
00585 static void
00586 micro_ilt(
00587 union tgsi_exec_channel *dst,
00588 const union tgsi_exec_channel *src0,
00589 const union tgsi_exec_channel *src1,
00590 const union tgsi_exec_channel *src2,
00591 const union tgsi_exec_channel *src3 )
00592 {
00593 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
00594 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
00595 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
00596 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
00597 }
00598
00599 static void
00600 micro_ult(
00601 union tgsi_exec_channel *dst,
00602 const union tgsi_exec_channel *src0,
00603 const union tgsi_exec_channel *src1,
00604 const union tgsi_exec_channel *src2,
00605 const union tgsi_exec_channel *src3 )
00606 {
00607 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
00608 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
00609 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
00610 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
00611 }
00612
00613 static void
00614 micro_max(
00615 union tgsi_exec_channel *dst,
00616 const union tgsi_exec_channel *src0,
00617 const union tgsi_exec_channel *src1 )
00618 {
00619 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
00620 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
00621 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
00622 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
00623 }
00624
00625 static void
00626 micro_imax(
00627 union tgsi_exec_channel *dst,
00628 const union tgsi_exec_channel *src0,
00629 const union tgsi_exec_channel *src1 )
00630 {
00631 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
00632 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
00633 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
00634 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
00635 }
00636
00637 static void
00638 micro_umax(
00639 union tgsi_exec_channel *dst,
00640 const union tgsi_exec_channel *src0,
00641 const union tgsi_exec_channel *src1 )
00642 {
00643 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
00644 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
00645 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
00646 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
00647 }
00648
00649 static void
00650 micro_min(
00651 union tgsi_exec_channel *dst,
00652 const union tgsi_exec_channel *src0,
00653 const union tgsi_exec_channel *src1 )
00654 {
00655 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
00656 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
00657 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
00658 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
00659 }
00660
00661 static void
00662 micro_imin(
00663 union tgsi_exec_channel *dst,
00664 const union tgsi_exec_channel *src0,
00665 const union tgsi_exec_channel *src1 )
00666 {
00667 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
00668 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
00669 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
00670 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
00671 }
00672
00673 static void
00674 micro_umin(
00675 union tgsi_exec_channel *dst,
00676 const union tgsi_exec_channel *src0,
00677 const union tgsi_exec_channel *src1 )
00678 {
00679 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
00680 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
00681 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
00682 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
00683 }
00684
00685 static void
00686 micro_umod(
00687 union tgsi_exec_channel *dst,
00688 const union tgsi_exec_channel *src0,
00689 const union tgsi_exec_channel *src1 )
00690 {
00691 dst->u[0] = src0->u[0] % src1->u[0];
00692 dst->u[1] = src0->u[1] % src1->u[1];
00693 dst->u[2] = src0->u[2] % src1->u[2];
00694 dst->u[3] = src0->u[3] % src1->u[3];
00695 }
00696
00697 static void
00698 micro_mul(
00699 union tgsi_exec_channel *dst,
00700 const union tgsi_exec_channel *src0,
00701 const union tgsi_exec_channel *src1 )
00702 {
00703 dst->f[0] = src0->f[0] * src1->f[0];
00704 dst->f[1] = src0->f[1] * src1->f[1];
00705 dst->f[2] = src0->f[2] * src1->f[2];
00706 dst->f[3] = src0->f[3] * src1->f[3];
00707 }
00708
00709 static void
00710 micro_imul(
00711 union tgsi_exec_channel *dst,
00712 const union tgsi_exec_channel *src0,
00713 const union tgsi_exec_channel *src1 )
00714 {
00715 dst->i[0] = src0->i[0] * src1->i[0];
00716 dst->i[1] = src0->i[1] * src1->i[1];
00717 dst->i[2] = src0->i[2] * src1->i[2];
00718 dst->i[3] = src0->i[3] * src1->i[3];
00719 }
00720
00721 static void
00722 micro_imul64(
00723 union tgsi_exec_channel *dst0,
00724 union tgsi_exec_channel *dst1,
00725 const union tgsi_exec_channel *src0,
00726 const union tgsi_exec_channel *src1 )
00727 {
00728 dst1->i[0] = src0->i[0] * src1->i[0];
00729 dst1->i[1] = src0->i[1] * src1->i[1];
00730 dst1->i[2] = src0->i[2] * src1->i[2];
00731 dst1->i[3] = src0->i[3] * src1->i[3];
00732 dst0->i[0] = 0;
00733 dst0->i[1] = 0;
00734 dst0->i[2] = 0;
00735 dst0->i[3] = 0;
00736 }
00737
00738 static void
00739 micro_umul64(
00740 union tgsi_exec_channel *dst0,
00741 union tgsi_exec_channel *dst1,
00742 const union tgsi_exec_channel *src0,
00743 const union tgsi_exec_channel *src1 )
00744 {
00745 dst1->u[0] = src0->u[0] * src1->u[0];
00746 dst1->u[1] = src0->u[1] * src1->u[1];
00747 dst1->u[2] = src0->u[2] * src1->u[2];
00748 dst1->u[3] = src0->u[3] * src1->u[3];
00749 dst0->u[0] = 0;
00750 dst0->u[1] = 0;
00751 dst0->u[2] = 0;
00752 dst0->u[3] = 0;
00753 }
00754
00755 static void
00756 micro_movc(
00757 union tgsi_exec_channel *dst,
00758 const union tgsi_exec_channel *src0,
00759 const union tgsi_exec_channel *src1,
00760 const union tgsi_exec_channel *src2 )
00761 {
00762 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
00763 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
00764 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
00765 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
00766 }
00767
00768 static void
00769 micro_neg(
00770 union tgsi_exec_channel *dst,
00771 const union tgsi_exec_channel *src )
00772 {
00773 dst->f[0] = -src->f[0];
00774 dst->f[1] = -src->f[1];
00775 dst->f[2] = -src->f[2];
00776 dst->f[3] = -src->f[3];
00777 }
00778
00779 static void
00780 micro_ineg(
00781 union tgsi_exec_channel *dst,
00782 const union tgsi_exec_channel *src )
00783 {
00784 dst->i[0] = -src->i[0];
00785 dst->i[1] = -src->i[1];
00786 dst->i[2] = -src->i[2];
00787 dst->i[3] = -src->i[3];
00788 }
00789
00790 static void
00791 micro_not(
00792 union tgsi_exec_channel *dst,
00793 const union tgsi_exec_channel *src )
00794 {
00795 dst->u[0] = ~src->u[0];
00796 dst->u[1] = ~src->u[1];
00797 dst->u[2] = ~src->u[2];
00798 dst->u[3] = ~src->u[3];
00799 }
00800
00801 static void
00802 micro_or(
00803 union tgsi_exec_channel *dst,
00804 const union tgsi_exec_channel *src0,
00805 const union tgsi_exec_channel *src1 )
00806 {
00807 dst->u[0] = src0->u[0] | src1->u[0];
00808 dst->u[1] = src0->u[1] | src1->u[1];
00809 dst->u[2] = src0->u[2] | src1->u[2];
00810 dst->u[3] = src0->u[3] | src1->u[3];
00811 }
00812
00813 static void
00814 micro_pow(
00815 union tgsi_exec_channel *dst,
00816 const union tgsi_exec_channel *src0,
00817 const union tgsi_exec_channel *src1 )
00818 {
00819 #if FAST_MATH
00820 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
00821 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
00822 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
00823 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
00824 #else
00825 dst->f[0] = powf( src0->f[0], src1->f[0] );
00826 dst->f[1] = powf( src0->f[1], src1->f[1] );
00827 dst->f[2] = powf( src0->f[2], src1->f[2] );
00828 dst->f[3] = powf( src0->f[3], src1->f[3] );
00829 #endif
00830 }
00831
00832 static void
00833 micro_rnd(
00834 union tgsi_exec_channel *dst,
00835 const union tgsi_exec_channel *src )
00836 {
00837 dst->f[0] = floorf( src->f[0] + 0.5f );
00838 dst->f[1] = floorf( src->f[1] + 0.5f );
00839 dst->f[2] = floorf( src->f[2] + 0.5f );
00840 dst->f[3] = floorf( src->f[3] + 0.5f );
00841 }
00842
00843 static void
00844 micro_sgn(
00845 union tgsi_exec_channel *dst,
00846 const union tgsi_exec_channel *src )
00847 {
00848 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
00849 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
00850 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
00851 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
00852 }
00853
00854 static void
00855 micro_shl(
00856 union tgsi_exec_channel *dst,
00857 const union tgsi_exec_channel *src0,
00858 const union tgsi_exec_channel *src1 )
00859 {
00860 dst->i[0] = src0->i[0] << src1->i[0];
00861 dst->i[1] = src0->i[1] << src1->i[1];
00862 dst->i[2] = src0->i[2] << src1->i[2];
00863 dst->i[3] = src0->i[3] << src1->i[3];
00864 }
00865
00866 static void
00867 micro_ishr(
00868 union tgsi_exec_channel *dst,
00869 const union tgsi_exec_channel *src0,
00870 const union tgsi_exec_channel *src1 )
00871 {
00872 dst->i[0] = src0->i[0] >> src1->i[0];
00873 dst->i[1] = src0->i[1] >> src1->i[1];
00874 dst->i[2] = src0->i[2] >> src1->i[2];
00875 dst->i[3] = src0->i[3] >> src1->i[3];
00876 }
00877
00878 static void
00879 micro_trunc(
00880 union tgsi_exec_channel *dst,
00881 const union tgsi_exec_channel *src0 )
00882 {
00883 dst->f[0] = (float) (int) src0->f[0];
00884 dst->f[1] = (float) (int) src0->f[1];
00885 dst->f[2] = (float) (int) src0->f[2];
00886 dst->f[3] = (float) (int) src0->f[3];
00887 }
00888
00889 static void
00890 micro_ushr(
00891 union tgsi_exec_channel *dst,
00892 const union tgsi_exec_channel *src0,
00893 const union tgsi_exec_channel *src1 )
00894 {
00895 dst->u[0] = src0->u[0] >> src1->u[0];
00896 dst->u[1] = src0->u[1] >> src1->u[1];
00897 dst->u[2] = src0->u[2] >> src1->u[2];
00898 dst->u[3] = src0->u[3] >> src1->u[3];
00899 }
00900
00901 static void
00902 micro_sin(
00903 union tgsi_exec_channel *dst,
00904 const union tgsi_exec_channel *src )
00905 {
00906 dst->f[0] = sinf( src->f[0] );
00907 dst->f[1] = sinf( src->f[1] );
00908 dst->f[2] = sinf( src->f[2] );
00909 dst->f[3] = sinf( src->f[3] );
00910 }
00911
00912 static void
00913 micro_sqrt( union tgsi_exec_channel *dst,
00914 const union tgsi_exec_channel *src )
00915 {
00916 dst->f[0] = sqrtf( src->f[0] );
00917 dst->f[1] = sqrtf( src->f[1] );
00918 dst->f[2] = sqrtf( src->f[2] );
00919 dst->f[3] = sqrtf( src->f[3] );
00920 }
00921
00922 static void
00923 micro_sub(
00924 union tgsi_exec_channel *dst,
00925 const union tgsi_exec_channel *src0,
00926 const union tgsi_exec_channel *src1 )
00927 {
00928 dst->f[0] = src0->f[0] - src1->f[0];
00929 dst->f[1] = src0->f[1] - src1->f[1];
00930 dst->f[2] = src0->f[2] - src1->f[2];
00931 dst->f[3] = src0->f[3] - src1->f[3];
00932 }
00933
00934 static void
00935 micro_u2f(
00936 union tgsi_exec_channel *dst,
00937 const union tgsi_exec_channel *src )
00938 {
00939 dst->f[0] = (float) src->u[0];
00940 dst->f[1] = (float) src->u[1];
00941 dst->f[2] = (float) src->u[2];
00942 dst->f[3] = (float) src->u[3];
00943 }
00944
00945 static void
00946 micro_xor(
00947 union tgsi_exec_channel *dst,
00948 const union tgsi_exec_channel *src0,
00949 const union tgsi_exec_channel *src1 )
00950 {
00951 dst->u[0] = src0->u[0] ^ src1->u[0];
00952 dst->u[1] = src0->u[1] ^ src1->u[1];
00953 dst->u[2] = src0->u[2] ^ src1->u[2];
00954 dst->u[3] = src0->u[3] ^ src1->u[3];
00955 }
00956
00957 static void
00958 fetch_src_file_channel(
00959 const struct tgsi_exec_machine *mach,
00960 const uint file,
00961 const uint swizzle,
00962 const union tgsi_exec_channel *index,
00963 union tgsi_exec_channel *chan )
00964 {
00965 switch( swizzle ) {
00966 case TGSI_EXTSWIZZLE_X:
00967 case TGSI_EXTSWIZZLE_Y:
00968 case TGSI_EXTSWIZZLE_Z:
00969 case TGSI_EXTSWIZZLE_W:
00970 switch( file ) {
00971 case TGSI_FILE_CONSTANT:
00972 assert(mach->Consts);
00973 if (index->i[0] < 0)
00974 chan->f[0] = 0.0f;
00975 else
00976 chan->f[0] = mach->Consts[index->i[0]][swizzle];
00977 if (index->i[1] < 0)
00978 chan->f[1] = 0.0f;
00979 else
00980 chan->f[1] = mach->Consts[index->i[1]][swizzle];
00981 if (index->i[2] < 0)
00982 chan->f[2] = 0.0f;
00983 else
00984 chan->f[2] = mach->Consts[index->i[2]][swizzle];
00985 if (index->i[3] < 0)
00986 chan->f[3] = 0.0f;
00987 else
00988 chan->f[3] = mach->Consts[index->i[3]][swizzle];
00989 break;
00990
00991 case TGSI_FILE_INPUT:
00992 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
00993 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
00994 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
00995 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
00996 break;
00997
00998 case TGSI_FILE_TEMPORARY:
00999 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
01000 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
01001 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
01002 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
01003 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
01004 break;
01005
01006 case TGSI_FILE_IMMEDIATE:
01007 assert( index->i[0] < (int) mach->ImmLimit );
01008 chan->f[0] = mach->Imms[index->i[0]][swizzle];
01009 assert( index->i[1] < (int) mach->ImmLimit );
01010 chan->f[1] = mach->Imms[index->i[1]][swizzle];
01011 assert( index->i[2] < (int) mach->ImmLimit );
01012 chan->f[2] = mach->Imms[index->i[2]][swizzle];
01013 assert( index->i[3] < (int) mach->ImmLimit );
01014 chan->f[3] = mach->Imms[index->i[3]][swizzle];
01015 break;
01016
01017 case TGSI_FILE_ADDRESS:
01018 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
01019 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
01020 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
01021 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
01022 break;
01023
01024 case TGSI_FILE_OUTPUT:
01025
01026 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
01027 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
01028 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
01029 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
01030 break;
01031
01032 default:
01033 assert( 0 );
01034 }
01035 break;
01036
01037 case TGSI_EXTSWIZZLE_ZERO:
01038 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
01039 break;
01040
01041 case TGSI_EXTSWIZZLE_ONE:
01042 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
01043 break;
01044
01045 default:
01046 assert( 0 );
01047 }
01048 }
01049
01050 static void
01051 fetch_source(
01052 const struct tgsi_exec_machine *mach,
01053 union tgsi_exec_channel *chan,
01054 const struct tgsi_full_src_register *reg,
01055 const uint chan_index )
01056 {
01057 union tgsi_exec_channel index;
01058 uint swizzle;
01059
01060 index.i[0] =
01061 index.i[1] =
01062 index.i[2] =
01063 index.i[3] = reg->SrcRegister.Index;
01064
01065 if (reg->SrcRegister.Indirect) {
01066 union tgsi_exec_channel index2;
01067 union tgsi_exec_channel indir_index;
01068 const uint execmask = mach->ExecMask;
01069 uint i;
01070
01071
01072 index2.i[0] =
01073 index2.i[1] =
01074 index2.i[2] =
01075 index2.i[3] = reg->SrcRegisterInd.Index;
01076
01077
01078 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X );
01079 fetch_src_file_channel(
01080 mach,
01081 reg->SrcRegisterInd.File,
01082 swizzle,
01083 &index2,
01084 &indir_index );
01085
01086
01087 index.i[0] += (int) indir_index.f[0];
01088 index.i[1] += (int) indir_index.f[1];
01089 index.i[2] += (int) indir_index.f[2];
01090 index.i[3] += (int) indir_index.f[3];
01091
01092
01093
01094
01095 for (i = 0; i < QUAD_SIZE; i++) {
01096 if ((execmask & (1 << i)) == 0)
01097 index.i[i] = 0;
01098 }
01099 }
01100
01101 if( reg->SrcRegister.Dimension ) {
01102 switch( reg->SrcRegister.File ) {
01103 case TGSI_FILE_INPUT:
01104 index.i[0] *= 17;
01105 index.i[1] *= 17;
01106 index.i[2] *= 17;
01107 index.i[3] *= 17;
01108 break;
01109 case TGSI_FILE_CONSTANT:
01110 index.i[0] *= 4096;
01111 index.i[1] *= 4096;
01112 index.i[2] *= 4096;
01113 index.i[3] *= 4096;
01114 break;
01115 default:
01116 assert( 0 );
01117 }
01118
01119 index.i[0] += reg->SrcRegisterDim.Index;
01120 index.i[1] += reg->SrcRegisterDim.Index;
01121 index.i[2] += reg->SrcRegisterDim.Index;
01122 index.i[3] += reg->SrcRegisterDim.Index;
01123
01124 if (reg->SrcRegisterDim.Indirect) {
01125 union tgsi_exec_channel index2;
01126 union tgsi_exec_channel indir_index;
01127 const uint execmask = mach->ExecMask;
01128 uint i;
01129
01130 index2.i[0] =
01131 index2.i[1] =
01132 index2.i[2] =
01133 index2.i[3] = reg->SrcRegisterDimInd.Index;
01134
01135 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X );
01136 fetch_src_file_channel(
01137 mach,
01138 reg->SrcRegisterDimInd.File,
01139 swizzle,
01140 &index2,
01141 &indir_index );
01142
01143 index.i[0] += (int) indir_index.f[0];
01144 index.i[1] += (int) indir_index.f[1];
01145 index.i[2] += (int) indir_index.f[2];
01146 index.i[3] += (int) indir_index.f[3];
01147
01148
01149
01150
01151 for (i = 0; i < QUAD_SIZE; i++) {
01152 if ((execmask & (1 << i)) == 0)
01153 index.i[i] = 0;
01154 }
01155 }
01156 }
01157
01158 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
01159 fetch_src_file_channel(
01160 mach,
01161 reg->SrcRegister.File,
01162 swizzle,
01163 &index,
01164 chan );
01165
01166 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
01167 case TGSI_UTIL_SIGN_CLEAR:
01168 micro_abs( chan, chan );
01169 break;
01170
01171 case TGSI_UTIL_SIGN_SET:
01172 micro_abs( chan, chan );
01173 micro_neg( chan, chan );
01174 break;
01175
01176 case TGSI_UTIL_SIGN_TOGGLE:
01177 micro_neg( chan, chan );
01178 break;
01179
01180 case TGSI_UTIL_SIGN_KEEP:
01181 break;
01182 }
01183
01184 if (reg->SrcRegisterExtMod.Complement) {
01185 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
01186 }
01187 }
01188
01189 static void
01190 store_dest(
01191 struct tgsi_exec_machine *mach,
01192 const union tgsi_exec_channel *chan,
01193 const struct tgsi_full_dst_register *reg,
01194 const struct tgsi_full_instruction *inst,
01195 uint chan_index )
01196 {
01197 uint i;
01198 union tgsi_exec_channel null;
01199 union tgsi_exec_channel *dst;
01200 uint execmask = mach->ExecMask;
01201
01202 switch (reg->DstRegister.File) {
01203 case TGSI_FILE_NULL:
01204 dst = &null;
01205 break;
01206
01207 case TGSI_FILE_OUTPUT:
01208 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
01209 + reg->DstRegister.Index].xyzw[chan_index];
01210 break;
01211
01212 case TGSI_FILE_TEMPORARY:
01213 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
01214 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
01215 break;
01216
01217 case TGSI_FILE_ADDRESS:
01218 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
01219 break;
01220
01221 default:
01222 assert( 0 );
01223 return;
01224 }
01225
01226 if (inst->InstructionExtNv.CondFlowEnable) {
01227 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
01228 uint swizzle;
01229 uint shift;
01230 uint mask;
01231 uint test;
01232
01233
01234
01235 assert( inst->InstructionExtNv.CondFlowIndex < 1 );
01236
01237 switch (chan_index) {
01238 case CHAN_X:
01239 swizzle = inst->InstructionExtNv.CondSwizzleX;
01240 break;
01241 case CHAN_Y:
01242 swizzle = inst->InstructionExtNv.CondSwizzleY;
01243 break;
01244 case CHAN_Z:
01245 swizzle = inst->InstructionExtNv.CondSwizzleZ;
01246 break;
01247 case CHAN_W:
01248 swizzle = inst->InstructionExtNv.CondSwizzleW;
01249 break;
01250 default:
01251 assert( 0 );
01252 return;
01253 }
01254
01255 switch (swizzle) {
01256 case TGSI_SWIZZLE_X:
01257 shift = TGSI_EXEC_CC_X_SHIFT;
01258 mask = TGSI_EXEC_CC_X_MASK;
01259 break;
01260 case TGSI_SWIZZLE_Y:
01261 shift = TGSI_EXEC_CC_Y_SHIFT;
01262 mask = TGSI_EXEC_CC_Y_MASK;
01263 break;
01264 case TGSI_SWIZZLE_Z:
01265 shift = TGSI_EXEC_CC_Z_SHIFT;
01266 mask = TGSI_EXEC_CC_Z_MASK;
01267 break;
01268 case TGSI_SWIZZLE_W:
01269 shift = TGSI_EXEC_CC_W_SHIFT;
01270 mask = TGSI_EXEC_CC_W_MASK;
01271 break;
01272 default:
01273 assert( 0 );
01274 return;
01275 }
01276
01277 switch (inst->InstructionExtNv.CondMask) {
01278 case TGSI_CC_GT:
01279 test = ~(TGSI_EXEC_CC_GT << shift) & mask;
01280 for (i = 0; i < QUAD_SIZE; i++)
01281 if (cc->u[i] & test)
01282 execmask &= ~(1 << i);
01283 break;
01284
01285 case TGSI_CC_EQ:
01286 test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
01287 for (i = 0; i < QUAD_SIZE; i++)
01288 if (cc->u[i] & test)
01289 execmask &= ~(1 << i);
01290 break;
01291
01292 case TGSI_CC_LT:
01293 test = ~(TGSI_EXEC_CC_LT << shift) & mask;
01294 for (i = 0; i < QUAD_SIZE; i++)
01295 if (cc->u[i] & test)
01296 execmask &= ~(1 << i);
01297 break;
01298
01299 case TGSI_CC_GE:
01300 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
01301 for (i = 0; i < QUAD_SIZE; i++)
01302 if (cc->u[i] & test)
01303 execmask &= ~(1 << i);
01304 break;
01305
01306 case TGSI_CC_LE:
01307 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
01308 for (i = 0; i < QUAD_SIZE; i++)
01309 if (cc->u[i] & test)
01310 execmask &= ~(1 << i);
01311 break;
01312
01313 case TGSI_CC_NE:
01314 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
01315 for (i = 0; i < QUAD_SIZE; i++)
01316 if (cc->u[i] & test)
01317 execmask &= ~(1 << i);
01318 break;
01319
01320 case TGSI_CC_TR:
01321 break;
01322
01323 case TGSI_CC_FL:
01324 for (i = 0; i < QUAD_SIZE; i++)
01325 execmask &= ~(1 << i);
01326 break;
01327
01328 default:
01329 assert( 0 );
01330 return;
01331 }
01332 }
01333
01334 switch (inst->Instruction.Saturate) {
01335 case TGSI_SAT_NONE:
01336 for (i = 0; i < QUAD_SIZE; i++)
01337 if (execmask & (1 << i))
01338 dst->i[i] = chan->i[i];
01339 break;
01340
01341 case TGSI_SAT_ZERO_ONE:
01342 for (i = 0; i < QUAD_SIZE; i++)
01343 if (execmask & (1 << i)) {
01344 if (chan->f[i] < 0.0f)
01345 dst->f[i] = 0.0f;
01346 else if (chan->f[i] > 1.0f)
01347 dst->f[i] = 1.0f;
01348 else
01349 dst->i[i] = chan->i[i];
01350 }
01351 break;
01352
01353 case TGSI_SAT_MINUS_PLUS_ONE:
01354 for (i = 0; i < QUAD_SIZE; i++)
01355 if (execmask & (1 << i)) {
01356 if (chan->f[i] < -1.0f)
01357 dst->f[i] = -1.0f;
01358 else if (chan->f[i] > 1.0f)
01359 dst->f[i] = 1.0f;
01360 else
01361 dst->i[i] = chan->i[i];
01362 }
01363 break;
01364
01365 default:
01366 assert( 0 );
01367 }
01368
01369 if (inst->InstructionExtNv.CondDstUpdate) {
01370 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
01371 uint shift;
01372 uint mask;
01373
01374
01375
01376 assert( inst->InstructionExtNv.CondDstIndex < 1 );
01377
01378 switch (chan_index) {
01379 case CHAN_X:
01380 shift = TGSI_EXEC_CC_X_SHIFT;
01381 mask = ~TGSI_EXEC_CC_X_MASK;
01382 break;
01383 case CHAN_Y:
01384 shift = TGSI_EXEC_CC_Y_SHIFT;
01385 mask = ~TGSI_EXEC_CC_Y_MASK;
01386 break;
01387 case CHAN_Z:
01388 shift = TGSI_EXEC_CC_Z_SHIFT;
01389 mask = ~TGSI_EXEC_CC_Z_MASK;
01390 break;
01391 case CHAN_W:
01392 shift = TGSI_EXEC_CC_W_SHIFT;
01393 mask = ~TGSI_EXEC_CC_W_MASK;
01394 break;
01395 default:
01396 assert( 0 );
01397 return;
01398 }
01399
01400 for (i = 0; i < QUAD_SIZE; i++)
01401 if (execmask & (1 << i)) {
01402 cc->u[i] &= mask;
01403 if (dst->f[i] < 0.0f)
01404 cc->u[i] |= TGSI_EXEC_CC_LT << shift;
01405 else if (dst->f[i] > 0.0f)
01406 cc->u[i] |= TGSI_EXEC_CC_GT << shift;
01407 else if (dst->f[i] == 0.0f)
01408 cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
01409 else
01410 cc->u[i] |= TGSI_EXEC_CC_UN << shift;
01411 }
01412 }
01413 }
01414
01415 #define FETCH(VAL,INDEX,CHAN)\
01416 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
01417
01418 #define STORE(VAL,INDEX,CHAN)\
01419 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
01420
01421
01426 static void
01427 exec_kil(struct tgsi_exec_machine *mach,
01428 const struct tgsi_full_instruction *inst)
01429 {
01430 uint uniquemask;
01431 uint chan_index;
01432 uint kilmask = 0;
01433 union tgsi_exec_channel r[1];
01434
01435
01436
01437
01438 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
01439
01440 for (chan_index = 0; chan_index < 4; chan_index++)
01441 {
01442 uint swizzle;
01443 uint i;
01444
01445
01446 swizzle = tgsi_util_get_full_src_register_extswizzle (
01447 &inst->FullSrcRegisters[0],
01448 chan_index);
01449
01450
01451 if (uniquemask & (1 << swizzle))
01452 continue;
01453 uniquemask |= 1 << swizzle;
01454
01455 FETCH(&r[0], 0, chan_index);
01456 for (i = 0; i < 4; i++)
01457 if (r[0].f[i] < 0.0f)
01458 kilmask |= 1 << i;
01459 }
01460
01461 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
01462 }
01463
01468 static void
01469 exec_kilp(struct tgsi_exec_machine *mach,
01470 const struct tgsi_full_instruction *inst)
01471 {
01472 uint kilmask;
01473
01474 if (inst->InstructionExtNv.CondFlowEnable) {
01475 uint swizzle[4];
01476 uint chan_index;
01477
01478 kilmask = 0x0;
01479
01480 swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
01481 swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
01482 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
01483 swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
01484
01485 for (chan_index = 0; chan_index < 4; chan_index++)
01486 {
01487 uint i;
01488
01489 for (i = 0; i < 4; i++) {
01490
01491 if (0)
01492 kilmask |= 1 << i;
01493 }
01494 }
01495 }
01496 else {
01497
01498 kilmask = mach->ExecMask;
01499 }
01500 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
01501 }
01502
01503
01504
01505
01506
01507 static void
01508 fetch_texel( struct tgsi_sampler *sampler,
01509 const union tgsi_exec_channel *s,
01510 const union tgsi_exec_channel *t,
01511 const union tgsi_exec_channel *p,
01512 float lodbias,
01513 union tgsi_exec_channel *r,
01514 union tgsi_exec_channel *g,
01515 union tgsi_exec_channel *b,
01516 union tgsi_exec_channel *a )
01517 {
01518 uint j;
01519 float rgba[NUM_CHANNELS][QUAD_SIZE];
01520
01521 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
01522
01523 for (j = 0; j < 4; j++) {
01524 r->f[j] = rgba[0][j];
01525 g->f[j] = rgba[1][j];
01526 b->f[j] = rgba[2][j];
01527 a->f[j] = rgba[3][j];
01528 }
01529 }
01530
01531
01532 static void
01533 exec_tex(struct tgsi_exec_machine *mach,
01534 const struct tgsi_full_instruction *inst,
01535 boolean biasLod,
01536 boolean projected)
01537 {
01538 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
01539 union tgsi_exec_channel r[8];
01540 uint chan_index;
01541 float lodBias;
01542
01543
01544
01545 switch (inst->InstructionExtTexture.Texture) {
01546 case TGSI_TEXTURE_1D:
01547
01548 FETCH(&r[0], 0, CHAN_X);
01549
01550 if (projected) {
01551 FETCH(&r[1], 0, CHAN_W);
01552 micro_div( &r[0], &r[0], &r[1] );
01553 }
01554
01555 if (biasLod) {
01556 FETCH(&r[1], 0, CHAN_W);
01557 lodBias = r[2].f[0];
01558 }
01559 else
01560 lodBias = 0.0;
01561
01562 fetch_texel(&mach->Samplers[unit],
01563 &r[0], NULL, NULL, lodBias,
01564 &r[0], &r[1], &r[2], &r[3]);
01565 break;
01566
01567 case TGSI_TEXTURE_2D:
01568 case TGSI_TEXTURE_RECT:
01569
01570 FETCH(&r[0], 0, CHAN_X);
01571 FETCH(&r[1], 0, CHAN_Y);
01572 FETCH(&r[2], 0, CHAN_Z);
01573
01574 if (projected) {
01575 FETCH(&r[3], 0, CHAN_W);
01576 micro_div( &r[0], &r[0], &r[3] );
01577 micro_div( &r[1], &r[1], &r[3] );
01578 micro_div( &r[2], &r[2], &r[3] );
01579 }
01580
01581 if (biasLod) {
01582 FETCH(&r[3], 0, CHAN_W);
01583 lodBias = r[3].f[0];
01584 }
01585 else
01586 lodBias = 0.0;
01587
01588 fetch_texel(&mach->Samplers[unit],
01589 &r[0], &r[1], &r[2], lodBias,
01590 &r[0], &r[1], &r[2], &r[3]);
01591 break;
01592
01593 case TGSI_TEXTURE_3D:
01594 case TGSI_TEXTURE_CUBE:
01595
01596 FETCH(&r[0], 0, CHAN_X);
01597 FETCH(&r[1], 0, CHAN_Y);
01598 FETCH(&r[2], 0, CHAN_Z);
01599
01600 if (projected) {
01601 FETCH(&r[3], 0, CHAN_W);
01602 micro_div( &r[0], &r[0], &r[3] );
01603 micro_div( &r[1], &r[1], &r[3] );
01604 micro_div( &r[2], &r[2], &r[3] );
01605 }
01606
01607 if (biasLod) {
01608 FETCH(&r[3], 0, CHAN_W);
01609 lodBias = r[3].f[0];
01610 }
01611 else
01612 lodBias = 0.0;
01613
01614 fetch_texel(&mach->Samplers[unit],
01615 &r[0], &r[1], &r[2], lodBias,
01616 &r[0], &r[1], &r[2], &r[3]);
01617 break;
01618
01619 default:
01620 assert (0);
01621 }
01622
01623 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01624 STORE( &r[chan_index], 0, chan_index );
01625 }
01626 }
01627
01628
01633 static void
01634 eval_constant_coef(
01635 struct tgsi_exec_machine *mach,
01636 unsigned attrib,
01637 unsigned chan )
01638 {
01639 unsigned i;
01640
01641 for( i = 0; i < QUAD_SIZE; i++ ) {
01642 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
01643 }
01644 }
01645
01650 static void
01651 eval_linear_coef(
01652 struct tgsi_exec_machine *mach,
01653 unsigned attrib,
01654 unsigned chan )
01655 {
01656 const float x = mach->QuadPos.xyzw[0].f[0];
01657 const float y = mach->QuadPos.xyzw[1].f[0];
01658 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
01659 const float dady = mach->InterpCoefs[attrib].dady[chan];
01660 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
01661 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
01662 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
01663 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
01664 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
01665 }
01666
01671 static void
01672 eval_perspective_coef(
01673 struct tgsi_exec_machine *mach,
01674 unsigned attrib,
01675 unsigned chan )
01676 {
01677 const float x = mach->QuadPos.xyzw[0].f[0];
01678 const float y = mach->QuadPos.xyzw[1].f[0];
01679 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
01680 const float dady = mach->InterpCoefs[attrib].dady[chan];
01681 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
01682 const float *w = mach->QuadPos.xyzw[3].f;
01683
01684 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
01685 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
01686 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
01687 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
01688 }
01689
01690
01691 typedef void (* eval_coef_func)(
01692 struct tgsi_exec_machine *mach,
01693 unsigned attrib,
01694 unsigned chan );
01695
01696 static void
01697 exec_declaration(
01698 struct tgsi_exec_machine *mach,
01699 const struct tgsi_full_declaration *decl )
01700 {
01701 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
01702 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
01703 unsigned first, last, mask;
01704 eval_coef_func eval;
01705
01706 first = decl->DeclarationRange.First;
01707 last = decl->DeclarationRange.Last;
01708 mask = decl->Declaration.UsageMask;
01709
01710 switch( decl->Declaration.Interpolate ) {
01711 case TGSI_INTERPOLATE_CONSTANT:
01712 eval = eval_constant_coef;
01713 break;
01714
01715 case TGSI_INTERPOLATE_LINEAR:
01716 eval = eval_linear_coef;
01717 break;
01718
01719 case TGSI_INTERPOLATE_PERSPECTIVE:
01720 eval = eval_perspective_coef;
01721 break;
01722
01723 default:
01724 assert( 0 );
01725 }
01726
01727 if( mask == TGSI_WRITEMASK_XYZW ) {
01728 unsigned i, j;
01729
01730 for( i = first; i <= last; i++ ) {
01731 for( j = 0; j < NUM_CHANNELS; j++ ) {
01732 eval( mach, i, j );
01733 }
01734 }
01735 }
01736 else {
01737 unsigned i, j;
01738
01739 for( j = 0; j < NUM_CHANNELS; j++ ) {
01740 if( mask & (1 << j) ) {
01741 for( i = first; i <= last; i++ ) {
01742 eval( mach, i, j );
01743 }
01744 }
01745 }
01746 }
01747 }
01748 }
01749 }
01750
01751 static void
01752 exec_instruction(
01753 struct tgsi_exec_machine *mach,
01754 const struct tgsi_full_instruction *inst,
01755 int *pc )
01756 {
01757 uint chan_index;
01758 union tgsi_exec_channel r[10];
01759
01760 (*pc)++;
01761
01762 switch (inst->Instruction.Opcode) {
01763 case TGSI_OPCODE_ARL:
01764 case TGSI_OPCODE_FLOOR:
01765
01766 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01767 FETCH( &r[0], 0, chan_index );
01768 micro_flr(&r[0], &r[0]);
01769 STORE( &r[0], 0, chan_index );
01770 }
01771 break;
01772
01773 case TGSI_OPCODE_MOV:
01774 case TGSI_OPCODE_SWZ:
01775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01776 FETCH( &r[0], 0, chan_index );
01777 STORE( &r[0], 0, chan_index );
01778 }
01779 break;
01780
01781 case TGSI_OPCODE_LIT:
01782 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01783 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01784 }
01785
01786 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01787 FETCH( &r[0], 0, CHAN_X );
01788 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01789 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01790 STORE( &r[0], 0, CHAN_Y );
01791 }
01792
01793 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01794 FETCH( &r[1], 0, CHAN_Y );
01795 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01796
01797 FETCH( &r[2], 0, CHAN_W );
01798 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
01799 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
01800 micro_pow( &r[1], &r[1], &r[2] );
01801 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01802 STORE( &r[0], 0, CHAN_Z );
01803 }
01804 }
01805
01806 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01807 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01808 }
01809 break;
01810
01811 case TGSI_OPCODE_RCP:
01812
01813 FETCH( &r[0], 0, CHAN_X );
01814 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
01815 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01816 STORE( &r[0], 0, chan_index );
01817 }
01818 break;
01819
01820 case TGSI_OPCODE_RSQ:
01821
01822 FETCH( &r[0], 0, CHAN_X );
01823 micro_sqrt( &r[0], &r[0] );
01824 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
01825 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01826 STORE( &r[0], 0, chan_index );
01827 }
01828 break;
01829
01830 case TGSI_OPCODE_EXP:
01831 FETCH( &r[0], 0, CHAN_X );
01832 micro_flr( &r[1], &r[0] );
01833 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01834 micro_exp2( &r[2], &r[1] );
01835 STORE( &r[2], 0, CHAN_X );
01836 }
01837 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01838 micro_sub( &r[2], &r[0], &r[1] );
01839 STORE( &r[2], 0, CHAN_Y );
01840 }
01841 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01842 micro_exp2( &r[2], &r[0] );
01843 STORE( &r[2], 0, CHAN_Z );
01844 }
01845 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01846 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01847 }
01848 break;
01849
01850 case TGSI_OPCODE_LOG:
01851 FETCH( &r[0], 0, CHAN_X );
01852 micro_abs( &r[2], &r[0] );
01853 micro_lg2( &r[1], &r[2] );
01854 micro_flr( &r[0], &r[1] );
01855 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01856 STORE( &r[0], 0, CHAN_X );
01857 }
01858 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01859 micro_exp2( &r[0], &r[0] );
01860 micro_div( &r[0], &r[2], &r[0] );
01861 STORE( &r[0], 0, CHAN_Y );
01862 }
01863 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01864 STORE( &r[1], 0, CHAN_Z );
01865 }
01866 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01867 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
01868 }
01869 break;
01870
01871 case TGSI_OPCODE_MUL:
01872 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
01873 {
01874 FETCH(&r[0], 0, chan_index);
01875 FETCH(&r[1], 1, chan_index);
01876
01877 micro_mul( &r[0], &r[0], &r[1] );
01878
01879 STORE(&r[0], 0, chan_index);
01880 }
01881 break;
01882
01883 case TGSI_OPCODE_ADD:
01884 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01885 FETCH( &r[0], 0, chan_index );
01886 FETCH( &r[1], 1, chan_index );
01887 micro_add( &r[0], &r[0], &r[1] );
01888 STORE( &r[0], 0, chan_index );
01889 }
01890 break;
01891
01892 case TGSI_OPCODE_DP3:
01893
01894 FETCH( &r[0], 0, CHAN_X );
01895 FETCH( &r[1], 1, CHAN_X );
01896 micro_mul( &r[0], &r[0], &r[1] );
01897
01898 FETCH( &r[1], 0, CHAN_Y );
01899 FETCH( &r[2], 1, CHAN_Y );
01900 micro_mul( &r[1], &r[1], &r[2] );
01901 micro_add( &r[0], &r[0], &r[1] );
01902
01903 FETCH( &r[1], 0, CHAN_Z );
01904 FETCH( &r[2], 1, CHAN_Z );
01905 micro_mul( &r[1], &r[1], &r[2] );
01906 micro_add( &r[0], &r[0], &r[1] );
01907
01908 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01909 STORE( &r[0], 0, chan_index );
01910 }
01911 break;
01912
01913 case TGSI_OPCODE_DP4:
01914
01915 FETCH(&r[0], 0, CHAN_X);
01916 FETCH(&r[1], 1, CHAN_X);
01917
01918 micro_mul( &r[0], &r[0], &r[1] );
01919
01920 FETCH(&r[1], 0, CHAN_Y);
01921 FETCH(&r[2], 1, CHAN_Y);
01922
01923 micro_mul( &r[1], &r[1], &r[2] );
01924 micro_add( &r[0], &r[0], &r[1] );
01925
01926 FETCH(&r[1], 0, CHAN_Z);
01927 FETCH(&r[2], 1, CHAN_Z);
01928
01929 micro_mul( &r[1], &r[1], &r[2] );
01930 micro_add( &r[0], &r[0], &r[1] );
01931
01932 FETCH(&r[1], 0, CHAN_W);
01933 FETCH(&r[2], 1, CHAN_W);
01934
01935 micro_mul( &r[1], &r[1], &r[2] );
01936 micro_add( &r[0], &r[0], &r[1] );
01937
01938 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01939 STORE( &r[0], 0, chan_index );
01940 }
01941 break;
01942
01943 case TGSI_OPCODE_DST:
01944 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
01945 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
01946 }
01947
01948 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01949 FETCH( &r[0], 0, CHAN_Y );
01950 FETCH( &r[1], 1, CHAN_Y);
01951 micro_mul( &r[0], &r[0], &r[1] );
01952 STORE( &r[0], 0, CHAN_Y );
01953 }
01954
01955 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01956 FETCH( &r[0], 0, CHAN_Z );
01957 STORE( &r[0], 0, CHAN_Z );
01958 }
01959
01960 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
01961 FETCH( &r[0], 1, CHAN_W );
01962 STORE( &r[0], 0, CHAN_W );
01963 }
01964 break;
01965
01966 case TGSI_OPCODE_MIN:
01967 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01968 FETCH(&r[0], 0, chan_index);
01969 FETCH(&r[1], 1, chan_index);
01970
01971
01972 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
01973
01974 STORE(&r[0], 0, chan_index);
01975 }
01976 break;
01977
01978 case TGSI_OPCODE_MAX:
01979 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01980 FETCH(&r[0], 0, chan_index);
01981 FETCH(&r[1], 1, chan_index);
01982
01983
01984 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
01985
01986 STORE(&r[0], 0, chan_index );
01987 }
01988 break;
01989
01990 case TGSI_OPCODE_SLT:
01991
01992 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
01993 FETCH( &r[0], 0, chan_index );
01994 FETCH( &r[1], 1, chan_index );
01995 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
01996 STORE( &r[0], 0, chan_index );
01997 }
01998 break;
01999
02000 case TGSI_OPCODE_SGE:
02001
02002 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02003 FETCH( &r[0], 0, chan_index );
02004 FETCH( &r[1], 1, chan_index );
02005 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02006 STORE( &r[0], 0, chan_index );
02007 }
02008 break;
02009
02010 case TGSI_OPCODE_MAD:
02011
02012 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02013 FETCH( &r[0], 0, chan_index );
02014 FETCH( &r[1], 1, chan_index );
02015 micro_mul( &r[0], &r[0], &r[1] );
02016 FETCH( &r[1], 2, chan_index );
02017 micro_add( &r[0], &r[0], &r[1] );
02018 STORE( &r[0], 0, chan_index );
02019 }
02020 break;
02021
02022 case TGSI_OPCODE_SUB:
02023 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02024 FETCH(&r[0], 0, chan_index);
02025 FETCH(&r[1], 1, chan_index);
02026
02027 micro_sub( &r[0], &r[0], &r[1] );
02028
02029 STORE(&r[0], 0, chan_index);
02030 }
02031 break;
02032
02033 case TGSI_OPCODE_LERP:
02034
02035 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02036 FETCH(&r[0], 0, chan_index);
02037 FETCH(&r[1], 1, chan_index);
02038 FETCH(&r[2], 2, chan_index);
02039
02040 micro_sub( &r[1], &r[1], &r[2] );
02041 micro_mul( &r[0], &r[0], &r[1] );
02042 micro_add( &r[0], &r[0], &r[2] );
02043
02044 STORE(&r[0], 0, chan_index);
02045 }
02046 break;
02047
02048 case TGSI_OPCODE_CND:
02049 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02050 FETCH(&r[0], 0, chan_index);
02051 FETCH(&r[1], 1, chan_index);
02052 FETCH(&r[2], 2, chan_index);
02053 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
02054 STORE(&r[0], 0, chan_index);
02055 }
02056 break;
02057
02058 case TGSI_OPCODE_CND0:
02059 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02060 FETCH(&r[0], 0, chan_index);
02061 FETCH(&r[1], 1, chan_index);
02062 FETCH(&r[2], 2, chan_index);
02063 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
02064 STORE(&r[0], 0, chan_index);
02065 }
02066 break;
02067
02068 case TGSI_OPCODE_DOT2ADD:
02069
02070 FETCH( &r[0], 0, CHAN_X );
02071 FETCH( &r[1], 1, CHAN_X );
02072 micro_mul( &r[0], &r[0], &r[1] );
02073
02074 FETCH( &r[1], 0, CHAN_Y );
02075 FETCH( &r[2], 1, CHAN_Y );
02076 micro_mul( &r[1], &r[1], &r[2] );
02077 micro_add( &r[0], &r[0], &r[1] );
02078
02079 FETCH( &r[2], 2, CHAN_X );
02080 micro_add( &r[0], &r[0], &r[2] );
02081
02082 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02083 STORE( &r[0], 0, chan_index );
02084 }
02085 break;
02086
02087 case TGSI_OPCODE_INDEX:
02088 assert (0);
02089 break;
02090
02091 case TGSI_OPCODE_NEGATE:
02092 assert (0);
02093 break;
02094
02095 case TGSI_OPCODE_FRAC:
02096
02097 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02098 FETCH( &r[0], 0, chan_index );
02099 micro_frc( &r[0], &r[0] );
02100 STORE( &r[0], 0, chan_index );
02101 }
02102 break;
02103
02104 case TGSI_OPCODE_CLAMP:
02105 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02106 FETCH(&r[0], 0, chan_index);
02107 FETCH(&r[1], 1, chan_index);
02108 micro_max(&r[0], &r[0], &r[1]);
02109 FETCH(&r[1], 2, chan_index);
02110 micro_min(&r[0], &r[0], &r[1]);
02111 STORE(&r[0], 0, chan_index);
02112 }
02113 break;
02114
02115 case TGSI_OPCODE_ROUND:
02116 case TGSI_OPCODE_ARR:
02117 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02118 FETCH( &r[0], 0, chan_index );
02119 micro_rnd( &r[0], &r[0] );
02120 STORE( &r[0], 0, chan_index );
02121 }
02122 break;
02123
02124 case TGSI_OPCODE_EXPBASE2:
02125
02126 FETCH(&r[0], 0, CHAN_X);
02127
02128 #if FAST_MATH
02129 micro_exp2( &r[0], &r[0] );
02130 #else
02131 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
02132 #endif
02133
02134 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02135 STORE( &r[0], 0, chan_index );
02136 }
02137 break;
02138
02139 case TGSI_OPCODE_LOGBASE2:
02140
02141 FETCH( &r[0], 0, CHAN_X );
02142 micro_lg2( &r[0], &r[0] );
02143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02144 STORE( &r[0], 0, chan_index );
02145 }
02146 break;
02147
02148 case TGSI_OPCODE_POWER:
02149
02150 FETCH(&r[0], 0, CHAN_X);
02151 FETCH(&r[1], 1, CHAN_X);
02152
02153 micro_pow( &r[0], &r[0], &r[1] );
02154
02155 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02156 STORE( &r[0], 0, chan_index );
02157 }
02158 break;
02159
02160 case TGSI_OPCODE_CROSSPRODUCT:
02161
02162 FETCH(&r[0], 0, CHAN_Y);
02163 FETCH(&r[1], 1, CHAN_Z);
02164
02165 micro_mul( &r[2], &r[0], &r[1] );
02166
02167 FETCH(&r[3], 0, CHAN_Z);
02168 FETCH(&r[4], 1, CHAN_Y);
02169
02170 micro_mul( &r[5], &r[3], &r[4] );
02171 micro_sub( &r[2], &r[2], &r[5] );
02172
02173 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
02174 STORE( &r[2], 0, CHAN_X );
02175 }
02176
02177 FETCH(&r[2], 1, CHAN_X);
02178
02179 micro_mul( &r[3], &r[3], &r[2] );
02180
02181 FETCH(&r[5], 0, CHAN_X);
02182
02183 micro_mul( &r[1], &r[1], &r[5] );
02184 micro_sub( &r[3], &r[3], &r[1] );
02185
02186 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
02187 STORE( &r[3], 0, CHAN_Y );
02188 }
02189
02190 micro_mul( &r[5], &r[5], &r[4] );
02191 micro_mul( &r[0], &r[0], &r[2] );
02192 micro_sub( &r[5], &r[5], &r[0] );
02193
02194 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
02195 STORE( &r[5], 0, CHAN_Z );
02196 }
02197
02198 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
02199 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
02200 }
02201 break;
02202
02203 case TGSI_OPCODE_MULTIPLYMATRIX:
02204 assert (0);
02205 break;
02206
02207 case TGSI_OPCODE_ABS:
02208 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02209 FETCH(&r[0], 0, chan_index);
02210
02211 micro_abs( &r[0], &r[0] );
02212
02213 STORE(&r[0], 0, chan_index);
02214 }
02215 break;
02216
02217 case TGSI_OPCODE_RCC:
02218 FETCH(&r[0], 0, CHAN_X);
02219 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
02220 micro_float_clamp(&r[0], &r[0]);
02221 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02222 STORE(&r[0], 0, chan_index);
02223 }
02224 break;
02225
02226 case TGSI_OPCODE_DPH:
02227 FETCH(&r[0], 0, CHAN_X);
02228 FETCH(&r[1], 1, CHAN_X);
02229
02230 micro_mul( &r[0], &r[0], &r[1] );
02231
02232 FETCH(&r[1], 0, CHAN_Y);
02233 FETCH(&r[2], 1, CHAN_Y);
02234
02235 micro_mul( &r[1], &r[1], &r[2] );
02236 micro_add( &r[0], &r[0], &r[1] );
02237
02238 FETCH(&r[1], 0, CHAN_Z);
02239 FETCH(&r[2], 1, CHAN_Z);
02240
02241 micro_mul( &r[1], &r[1], &r[2] );
02242 micro_add( &r[0], &r[0], &r[1] );
02243
02244 FETCH(&r[1], 1, CHAN_W);
02245
02246 micro_add( &r[0], &r[0], &r[1] );
02247
02248 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02249 STORE( &r[0], 0, chan_index );
02250 }
02251 break;
02252
02253 case TGSI_OPCODE_COS:
02254 FETCH(&r[0], 0, CHAN_X);
02255
02256 micro_cos( &r[0], &r[0] );
02257
02258 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02259 STORE( &r[0], 0, chan_index );
02260 }
02261 break;
02262
02263 case TGSI_OPCODE_DDX:
02264 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02265 FETCH( &r[0], 0, chan_index );
02266 micro_ddx( &r[0], &r[0] );
02267 STORE( &r[0], 0, chan_index );
02268 }
02269 break;
02270
02271 case TGSI_OPCODE_DDY:
02272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02273 FETCH( &r[0], 0, chan_index );
02274 micro_ddy( &r[0], &r[0] );
02275 STORE( &r[0], 0, chan_index );
02276 }
02277 break;
02278
02279 case TGSI_OPCODE_KILP:
02280 exec_kilp (mach, inst);
02281 break;
02282
02283 case TGSI_OPCODE_KIL:
02284 exec_kil (mach, inst);
02285 break;
02286
02287 case TGSI_OPCODE_PK2H:
02288 assert (0);
02289 break;
02290
02291 case TGSI_OPCODE_PK2US:
02292 assert (0);
02293 break;
02294
02295 case TGSI_OPCODE_PK4B:
02296 assert (0);
02297 break;
02298
02299 case TGSI_OPCODE_PK4UB:
02300 assert (0);
02301 break;
02302
02303 case TGSI_OPCODE_RFL:
02304 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02305 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02306 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02307
02308 FETCH(&r[2], 0, CHAN_X);
02309 micro_mul(&r[0], &r[2], &r[2]);
02310 FETCH(&r[4], 0, CHAN_Y);
02311 micro_mul(&r[8], &r[4], &r[4]);
02312 micro_add(&r[0], &r[0], &r[8]);
02313 FETCH(&r[6], 0, CHAN_Z);
02314 micro_mul(&r[8], &r[6], &r[6]);
02315 micro_add(&r[0], &r[0], &r[8]);
02316
02317
02318 FETCH(&r[3], 1, CHAN_X);
02319 micro_mul(&r[1], &r[2], &r[3]);
02320 FETCH(&r[5], 1, CHAN_Y);
02321 micro_mul(&r[8], &r[4], &r[5]);
02322 micro_add(&r[1], &r[1], &r[8]);
02323 FETCH(&r[7], 1, CHAN_Z);
02324 micro_mul(&r[8], &r[6], &r[7]);
02325 micro_add(&r[1], &r[1], &r[8]);
02326
02327
02328 micro_add(&r[1], &r[1], &r[1]);
02329 micro_div(&r[1], &r[1], &r[0]);
02330
02331 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02332 micro_mul(&r[2], &r[2], &r[1]);
02333 micro_sub(&r[2], &r[2], &r[3]);
02334 STORE(&r[2], 0, CHAN_X);
02335 }
02336 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02337 micro_mul(&r[4], &r[4], &r[1]);
02338 micro_sub(&r[4], &r[4], &r[5]);
02339 STORE(&r[4], 0, CHAN_Y);
02340 }
02341 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02342 micro_mul(&r[6], &r[6], &r[1]);
02343 micro_sub(&r[6], &r[6], &r[7]);
02344 STORE(&r[6], 0, CHAN_Z);
02345 }
02346 }
02347 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02348 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
02349 }
02350 break;
02351
02352 case TGSI_OPCODE_SEQ:
02353 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02354 FETCH( &r[0], 0, chan_index );
02355 FETCH( &r[1], 1, chan_index );
02356 micro_eq( &r[0], &r[0], &r[1],
02357 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
02358 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02359 STORE( &r[0], 0, chan_index );
02360 }
02361 break;
02362
02363 case TGSI_OPCODE_SFL:
02364 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02365 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
02366 }
02367 break;
02368
02369 case TGSI_OPCODE_SGT:
02370 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02371 FETCH( &r[0], 0, chan_index );
02372 FETCH( &r[1], 1, chan_index );
02373 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02374 STORE( &r[0], 0, chan_index );
02375 }
02376 break;
02377
02378 case TGSI_OPCODE_SIN:
02379 FETCH( &r[0], 0, CHAN_X );
02380 micro_sin( &r[0], &r[0] );
02381 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02382 STORE( &r[0], 0, chan_index );
02383 }
02384 break;
02385
02386 case TGSI_OPCODE_SLE:
02387 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02388 FETCH( &r[0], 0, chan_index );
02389 FETCH( &r[1], 1, chan_index );
02390 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
02391 STORE( &r[0], 0, chan_index );
02392 }
02393 break;
02394
02395 case TGSI_OPCODE_SNE:
02396 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02397 FETCH( &r[0], 0, chan_index );
02398 FETCH( &r[1], 1, chan_index );
02399 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02400 STORE( &r[0], 0, chan_index );
02401 }
02402 break;
02403
02404 case TGSI_OPCODE_STR:
02405 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
02406 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
02407 }
02408 break;
02409
02410 case TGSI_OPCODE_TEX:
02411
02412
02413
02414 exec_tex(mach, inst, FALSE, FALSE);
02415 break;
02416
02417 case TGSI_OPCODE_TXB:
02418
02419
02420
02421 exec_tex(mach, inst, TRUE, FALSE);
02422 break;
02423
02424 case TGSI_OPCODE_TXD:
02425
02426
02427
02428
02429
02430 assert (0);
02431 break;
02432
02433 case TGSI_OPCODE_TXL:
02434
02435
02436
02437 exec_tex(mach, inst, TRUE, FALSE);
02438 break;
02439
02440 case TGSI_OPCODE_TXP:
02441
02442
02443
02444 exec_tex(mach, inst, FALSE, TRUE);
02445 break;
02446
02447 case TGSI_OPCODE_UP2H:
02448 assert (0);
02449 break;
02450
02451 case TGSI_OPCODE_UP2US:
02452 assert (0);
02453 break;
02454
02455 case TGSI_OPCODE_UP4B:
02456 assert (0);
02457 break;
02458
02459 case TGSI_OPCODE_UP4UB:
02460 assert (0);
02461 break;
02462
02463 case TGSI_OPCODE_X2D:
02464 FETCH(&r[0], 1, CHAN_X);
02465 FETCH(&r[1], 1, CHAN_Y);
02466 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02467 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02468 FETCH(&r[2], 2, CHAN_X);
02469 micro_mul(&r[2], &r[2], &r[0]);
02470 FETCH(&r[3], 2, CHAN_Y);
02471 micro_mul(&r[3], &r[3], &r[1]);
02472 micro_add(&r[2], &r[2], &r[3]);
02473 FETCH(&r[3], 0, CHAN_X);
02474 micro_add(&r[2], &r[2], &r[3]);
02475 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02476 STORE(&r[2], 0, CHAN_X);
02477 }
02478 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02479 STORE(&r[2], 0, CHAN_Z);
02480 }
02481 }
02482 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02483 IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02484 FETCH(&r[2], 2, CHAN_Z);
02485 micro_mul(&r[2], &r[2], &r[0]);
02486 FETCH(&r[3], 2, CHAN_W);
02487 micro_mul(&r[3], &r[3], &r[1]);
02488 micro_add(&r[2], &r[2], &r[3]);
02489 FETCH(&r[3], 0, CHAN_Y);
02490 micro_add(&r[2], &r[2], &r[3]);
02491 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02492 STORE(&r[2], 0, CHAN_Y);
02493 }
02494 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02495 STORE(&r[2], 0, CHAN_W);
02496 }
02497 }
02498 break;
02499
02500 case TGSI_OPCODE_ARA:
02501 assert (0);
02502 break;
02503
02504 case TGSI_OPCODE_BRA:
02505 assert (0);
02506 break;
02507
02508 case TGSI_OPCODE_CAL:
02509
02510 if (mach->ExecMask) {
02511
02512
02513
02514
02515
02516
02517
02518
02519
02520 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
02521 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
02522 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
02523
02524 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
02525
02526 mach->CallStackTop++;
02527
02528
02529 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
02530 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
02531 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02532 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
02533 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02534 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
02535 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
02536 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
02537
02538
02539 *pc = inst->InstructionExtLabel.Label;
02540 }
02541 break;
02542
02543 case TGSI_OPCODE_RET:
02544 mach->FuncMask &= ~mach->ExecMask;
02545 UPDATE_EXEC_MASK(mach);
02546
02547 if (mach->FuncMask == 0x0) {
02548
02549
02550 if (mach->CallStackTop == 0) {
02551
02552 *pc = -1;
02553 return;
02554 }
02555
02556 assert(mach->CallStackTop > 0);
02557 mach->CallStackTop--;
02558
02559 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
02560 mach->CondMask = mach->CondStack[mach->CondStackTop];
02561
02562 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
02563 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
02564
02565 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
02566 mach->ContMask = mach->ContStack[mach->ContStackTop];
02567
02568 assert(mach->FuncStackTop > 0);
02569 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
02570
02571 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
02572
02573 UPDATE_EXEC_MASK(mach);
02574 }
02575 break;
02576
02577 case TGSI_OPCODE_SSG:
02578
02579 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02580 FETCH( &r[0], 0, chan_index );
02581 micro_sgn( &r[0], &r[0] );
02582 STORE( &r[0], 0, chan_index );
02583 }
02584 break;
02585
02586 case TGSI_OPCODE_CMP:
02587 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02588 FETCH(&r[0], 0, chan_index);
02589 FETCH(&r[1], 1, chan_index);
02590 FETCH(&r[2], 2, chan_index);
02591
02592 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
02593
02594 STORE(&r[0], 0, chan_index);
02595 }
02596 break;
02597
02598 case TGSI_OPCODE_SCS:
02599 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
02600 FETCH( &r[0], 0, CHAN_X );
02601 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02602 micro_cos(&r[1], &r[0]);
02603 STORE(&r[1], 0, CHAN_X);
02604 }
02605 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02606 micro_sin(&r[1], &r[0]);
02607 STORE(&r[1], 0, CHAN_Y);
02608 }
02609 }
02610 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
02611 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
02612 }
02613 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
02614 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
02615 }
02616 break;
02617
02618 case TGSI_OPCODE_NRM:
02619
02620 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
02621 IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02622 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02623
02624 FETCH(&r[0], 0, CHAN_X);
02625 micro_mul(&r[3], &r[0], &r[0]);
02626 FETCH(&r[1], 0, CHAN_Y);
02627 micro_mul(&r[4], &r[1], &r[1]);
02628 micro_add(&r[3], &r[3], &r[4]);
02629 FETCH(&r[2], 0, CHAN_Z);
02630 micro_mul(&r[4], &r[2], &r[2]);
02631 micro_add(&r[3], &r[3], &r[4]);
02632 micro_sqrt(&r[3], &r[3]);
02633
02634 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
02635 micro_div(&r[0], &r[0], &r[3]);
02636 STORE(&r[0], 0, CHAN_X);
02637 }
02638 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02639 micro_div(&r[1], &r[1], &r[3]);
02640 STORE(&r[1], 0, CHAN_Y);
02641 }
02642 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02643 micro_div(&r[2], &r[2], &r[3]);
02644 STORE(&r[2], 0, CHAN_Z);
02645 }
02646 }
02647 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
02648 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
02649 }
02650 break;
02651
02652 case TGSI_OPCODE_NRM4:
02653
02654 {
02655 union tgsi_exec_channel tmp, dot;
02656
02657
02658 FETCH( &r[0], 0, CHAN_X );
02659 micro_mul( &tmp, &r[0], &r[0] );
02660
02661 FETCH( &r[1], 0, CHAN_Y );
02662 micro_mul( &dot, &r[1], &r[1] );
02663 micro_add( &tmp, &tmp, &dot );
02664
02665 FETCH( &r[2], 0, CHAN_Z );
02666 micro_mul( &dot, &r[2], &r[2] );
02667 micro_add( &tmp, &tmp, &dot );
02668
02669 FETCH( &r[3], 0, CHAN_W );
02670 micro_mul( &dot, &r[3], &r[3] );
02671 micro_add( &tmp, &tmp, &dot );
02672
02673
02674 micro_sqrt( &tmp, &tmp );
02675 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
02676
02677 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02678
02679 micro_mul( &r[chan_index], &tmp, &r[chan_index] );
02680 STORE( &r[chan_index], 0, chan_index );
02681 }
02682 }
02683 break;
02684
02685 case TGSI_OPCODE_DIV:
02686 assert( 0 );
02687 break;
02688
02689 case TGSI_OPCODE_DP2:
02690 FETCH( &r[0], 0, CHAN_X );
02691 FETCH( &r[1], 1, CHAN_X );
02692 micro_mul( &r[0], &r[0], &r[1] );
02693
02694 FETCH( &r[1], 0, CHAN_Y );
02695 FETCH( &r[2], 1, CHAN_Y );
02696 micro_mul( &r[1], &r[1], &r[2] );
02697 micro_add( &r[0], &r[0], &r[1] );
02698
02699 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02700 STORE( &r[0], 0, chan_index );
02701 }
02702 break;
02703
02704 case TGSI_OPCODE_IF:
02705
02706 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
02707 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
02708 FETCH( &r[0], 0, CHAN_X );
02709
02710 if( ! r[0].f[0] ) {
02711 mach->CondMask &= ~0x1;
02712 }
02713 if( ! r[0].f[1] ) {
02714 mach->CondMask &= ~0x2;
02715 }
02716 if( ! r[0].f[2] ) {
02717 mach->CondMask &= ~0x4;
02718 }
02719 if( ! r[0].f[3] ) {
02720 mach->CondMask &= ~0x8;
02721 }
02722 UPDATE_EXEC_MASK(mach);
02723
02724 break;
02725
02726 case TGSI_OPCODE_ELSE:
02727
02728 {
02729 uint prevMask;
02730 assert(mach->CondStackTop > 0);
02731 prevMask = mach->CondStack[mach->CondStackTop - 1];
02732 mach->CondMask = ~mach->CondMask & prevMask;
02733 UPDATE_EXEC_MASK(mach);
02734
02735 }
02736 break;
02737
02738 case TGSI_OPCODE_ENDIF:
02739
02740 assert(mach->CondStackTop > 0);
02741 mach->CondMask = mach->CondStack[--mach->CondStackTop];
02742 UPDATE_EXEC_MASK(mach);
02743 break;
02744
02745 case TGSI_OPCODE_END:
02746
02747 *pc = -1;
02748 break;
02749
02750 case TGSI_OPCODE_REP:
02751 assert (0);
02752 break;
02753
02754 case TGSI_OPCODE_ENDREP:
02755 assert (0);
02756 break;
02757
02758 case TGSI_OPCODE_PUSHA:
02759 assert (0);
02760 break;
02761
02762 case TGSI_OPCODE_POPA:
02763 assert (0);
02764 break;
02765
02766 case TGSI_OPCODE_CEIL:
02767 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02768 FETCH( &r[0], 0, chan_index );
02769 micro_ceil( &r[0], &r[0] );
02770 STORE( &r[0], 0, chan_index );
02771 }
02772 break;
02773
02774 case TGSI_OPCODE_I2F:
02775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02776 FETCH( &r[0], 0, chan_index );
02777 micro_i2f( &r[0], &r[0] );
02778 STORE( &r[0], 0, chan_index );
02779 }
02780 break;
02781
02782 case TGSI_OPCODE_NOT:
02783 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02784 FETCH( &r[0], 0, chan_index );
02785 micro_not( &r[0], &r[0] );
02786 STORE( &r[0], 0, chan_index );
02787 }
02788 break;
02789
02790 case TGSI_OPCODE_TRUNC:
02791 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02792 FETCH( &r[0], 0, chan_index );
02793 micro_trunc( &r[0], &r[0] );
02794 STORE( &r[0], 0, chan_index );
02795 }
02796 break;
02797
02798 case TGSI_OPCODE_SHL:
02799 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02800 FETCH( &r[0], 0, chan_index );
02801 FETCH( &r[1], 1, chan_index );
02802 micro_shl( &r[0], &r[0], &r[1] );
02803 STORE( &r[0], 0, chan_index );
02804 }
02805 break;
02806
02807 case TGSI_OPCODE_SHR:
02808 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02809 FETCH( &r[0], 0, chan_index );
02810 FETCH( &r[1], 1, chan_index );
02811 micro_ishr( &r[0], &r[0], &r[1] );
02812 STORE( &r[0], 0, chan_index );
02813 }
02814 break;
02815
02816 case TGSI_OPCODE_AND:
02817 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02818 FETCH( &r[0], 0, chan_index );
02819 FETCH( &r[1], 1, chan_index );
02820 micro_and( &r[0], &r[0], &r[1] );
02821 STORE( &r[0], 0, chan_index );
02822 }
02823 break;
02824
02825 case TGSI_OPCODE_OR:
02826 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02827 FETCH( &r[0], 0, chan_index );
02828 FETCH( &r[1], 1, chan_index );
02829 micro_or( &r[0], &r[0], &r[1] );
02830 STORE( &r[0], 0, chan_index );
02831 }
02832 break;
02833
02834 case TGSI_OPCODE_MOD:
02835 assert (0);
02836 break;
02837
02838 case TGSI_OPCODE_XOR:
02839 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
02840 FETCH( &r[0], 0, chan_index );
02841 FETCH( &r[1], 1, chan_index );
02842 micro_xor( &r[0], &r[0], &r[1] );
02843 STORE( &r[0], 0, chan_index );
02844 }
02845 break;
02846
02847 case TGSI_OPCODE_SAD:
02848 assert (0);
02849 break;
02850
02851 case TGSI_OPCODE_TXF:
02852 assert (0);
02853 break;
02854
02855 case TGSI_OPCODE_TXQ:
02856 assert (0);
02857 break;
02858
02859 case TGSI_OPCODE_EMIT:
02860 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
02861 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
02862 break;
02863
02864 case TGSI_OPCODE_ENDPRIM:
02865 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
02866 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
02867 break;
02868
02869 case TGSI_OPCODE_LOOP:
02870 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02871 for (chan_index = 0; chan_index < 3; chan_index++) {
02872 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
02873 }
02874 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
02875 ++mach->LoopCounterStackTop;
02876
02877 case TGSI_OPCODE_BGNLOOP2:
02878
02879 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02880 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
02881 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02882 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
02883 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
02884 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
02885 break;
02886
02887 case TGSI_OPCODE_ENDLOOP:
02888 assert(mach->LoopCounterStackTop > 0);
02889 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
02890 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
02891 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
02892
02893 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
02894 mach->LoopMask &= ~0x1;
02895 }
02896 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
02897 mach->LoopMask &= ~0x2;
02898 }
02899 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
02900 mach->LoopMask &= ~0x4;
02901 }
02902 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
02903 mach->LoopMask &= ~0x8;
02904 }
02905 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
02906 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
02907 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
02908 assert(mach->LoopLabelStackTop > 0);
02909 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
02910 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
02911
02912 assert(mach->ContStackTop > 0);
02913 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
02914 UPDATE_EXEC_MASK(mach);
02915 if (mach->ExecMask) {
02916
02917 assert(mach->LoopLabelStackTop > 0);
02918 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
02919 }
02920 else {
02921
02922 assert(mach->LoopStackTop > 0);
02923 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
02924
02925 assert(mach->ContStackTop > 0);
02926 mach->ContMask = mach->ContStack[--mach->ContStackTop];
02927 assert(mach->LoopLabelStackTop > 0);
02928 --mach->LoopLabelStackTop;
02929 assert(mach->LoopCounterStackTop > 0);
02930 --mach->LoopCounterStackTop;
02931 }
02932 UPDATE_EXEC_MASK(mach);
02933 break;
02934
02935 case TGSI_OPCODE_ENDLOOP2:
02936
02937 assert(mach->ContStackTop > 0);
02938 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
02939 UPDATE_EXEC_MASK(mach);
02940 if (mach->ExecMask) {
02941
02942 assert(mach->LoopLabelStackTop > 0);
02943 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
02944 }
02945 else {
02946
02947 assert(mach->LoopStackTop > 0);
02948 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
02949
02950 assert(mach->ContStackTop > 0);
02951 mach->ContMask = mach->ContStack[--mach->ContStackTop];
02952 assert(mach->LoopLabelStackTop > 0);
02953 --mach->LoopLabelStackTop;
02954 }
02955 UPDATE_EXEC_MASK(mach);
02956 break;
02957
02958 case TGSI_OPCODE_BRK:
02959
02960 mach->LoopMask &= ~mach->ExecMask;
02961
02962 UPDATE_EXEC_MASK(mach);
02963 break;
02964
02965 case TGSI_OPCODE_CONT:
02966
02967 mach->ContMask &= ~mach->ExecMask;
02968
02969 UPDATE_EXEC_MASK(mach);
02970 break;
02971
02972 case TGSI_OPCODE_BGNSUB:
02973
02974 break;
02975
02976 case TGSI_OPCODE_ENDSUB:
02977
02978 break;
02979
02980 case TGSI_OPCODE_NOISE1:
02981 assert( 0 );
02982 break;
02983
02984 case TGSI_OPCODE_NOISE2:
02985 assert( 0 );
02986 break;
02987
02988 case TGSI_OPCODE_NOISE3:
02989 assert( 0 );
02990 break;
02991
02992 case TGSI_OPCODE_NOISE4:
02993 assert( 0 );
02994 break;
02995
02996 case TGSI_OPCODE_NOP:
02997 break;
02998
02999 default:
03000 assert( 0 );
03001 }
03002 }
03003
03004
03009 uint
03010 tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
03011 {
03012 uint i;
03013 int pc = 0;
03014
03015 mach->CondMask = 0xf;
03016 mach->LoopMask = 0xf;
03017 mach->ContMask = 0xf;
03018 mach->FuncMask = 0xf;
03019 mach->ExecMask = 0xf;
03020
03021 mach->CondStackTop = 0;
03022 mach->LoopStackTop = 0;
03023 mach->ContStackTop = 0;
03024 mach->CallStackTop = 0;
03025
03026 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
03027 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
03028
03029 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
03030 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
03031 mach->Primitives[0] = 0;
03032 }
03033
03034 for (i = 0; i < QUAD_SIZE; i++) {
03035 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
03036 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
03037 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
03038 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
03039 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
03040 }
03041
03042
03043 for (i = 0; i < mach->NumDeclarations; i++) {
03044 exec_declaration( mach, mach->Declarations+i );
03045 }
03046
03047
03048 while (pc != -1) {
03049 assert(pc < (int) mach->NumInstructions);
03050 exec_instruction( mach, mach->Instructions + pc, &pc );
03051 }
03052
03053 assert(mach->CondStackTop == 0);
03054 assert(mach->LoopStackTop == 0);
03055 assert(mach->ContStackTop == 0);
03056 assert(mach->CallStackTop == 0);
03057
03058 #if 0
03059
03060 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
03061
03062
03063
03064 for (i = 0; i < 4; i++)
03065 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
03066 }
03067 #endif
03068
03069 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
03070 }
03071
03072