00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <stdarg.h>
00030
00031 #include "i915_reg.h"
00032 #include "i915_context.h"
00033 #include "i915_fpc.h"
00034
00035 #include "pipe/p_shader_tokens.h"
00036 #include "util/u_math.h"
00037 #include "util/u_memory.h"
00038 #include "util/u_string.h"
00039 #include "tgsi/tgsi_parse.h"
00040 #include "tgsi/tgsi_dump.h"
00041
00042 #include "draw/draw_vertex.h"
00043
00044
00049 static unsigned passthrough[] =
00050 {
00051 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
00052
00053
00054
00055 (D0_DCL |
00056 (REG_TYPE_T << D0_TYPE_SHIFT) |
00057 (T_DIFFUSE << D0_NR_SHIFT) |
00058 D0_CHANNEL_ALL),
00059 0,
00060 0,
00061
00062
00063
00064 (A0_MOV |
00065 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
00066 A0_DEST_CHANNEL_ALL |
00067 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) |
00068 (T_DIFFUSE << A0_SRC0_NR_SHIFT)),
00069 0x01230000,
00070 0
00071 };
00072
00073
00074
00075 static const float sin_constants[4] = { 1.0,
00076 -1.0f / (3 * 2 * 1),
00077 1.0f / (5 * 4 * 3 * 2 * 1),
00078 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
00079 };
00080
00081
00082 static const float cos_constants[4] = { 1.0,
00083 -1.0f / (2 * 1),
00084 1.0f / (4 * 3 * 2 * 1),
00085 -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
00086 };
00087
00088
00089
00093 static INLINE int
00094 negate(int reg, int x, int y, int z, int w)
00095 {
00096
00097 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
00098 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
00099 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
00100 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
00101 }
00102
00103
00108 static void
00109 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
00110 {
00111 fs->program = (uint *) MALLOC(sizeof(passthrough));
00112 if (fs->program) {
00113 memcpy(fs->program, passthrough, sizeof(passthrough));
00114 fs->program_len = Elements(passthrough);
00115 }
00116 fs->num_constants = 0;
00117 }
00118
00119
00120 void
00121 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
00122 {
00123 va_list args;
00124 char buffer[1024];
00125
00126 debug_printf("i915_program_error: ");
00127 va_start( args, msg );
00128 util_vsnprintf( buffer, sizeof(buffer), msg, args );
00129 va_end( args );
00130 debug_printf(buffer);
00131 debug_printf("\n");
00132
00133 p->error = 1;
00134 }
00135
00136
00137
00142 static uint
00143 src_vector(struct i915_fp_compile *p,
00144 const struct tgsi_full_src_register *source)
00145 {
00146 uint index = source->SrcRegister.Index;
00147 uint src, sem_name, sem_ind;
00148
00149 switch (source->SrcRegister.File) {
00150 case TGSI_FILE_TEMPORARY:
00151 if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
00152 i915_program_error(p, "Exceeded max temporary reg");
00153 return 0;
00154 }
00155 src = UREG(REG_TYPE_R, index);
00156 break;
00157 case TGSI_FILE_INPUT:
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168 sem_name = p->shader->info.input_semantic_name[index];
00169 sem_ind = p->shader->info.input_semantic_index[index];
00170
00171 switch (sem_name) {
00172 case TGSI_SEMANTIC_POSITION:
00173 debug_printf("SKIP SEM POS\n");
00174
00175
00176
00177
00178 break;
00179 case TGSI_SEMANTIC_COLOR:
00180 if (sem_ind == 0) {
00181 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
00182 }
00183 else {
00184
00185 assert(sem_ind == 1);
00186 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
00187 src = swizzle(src, X, Y, Z, ONE);
00188 }
00189 break;
00190 case TGSI_SEMANTIC_FOG:
00191 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
00192 src = swizzle(src, W, W, W, W);
00193 break;
00194 case TGSI_SEMANTIC_GENERIC:
00195
00196 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL);
00197 break;
00198 default:
00199 i915_program_error(p, "Bad source->Index");
00200 return 0;
00201 }
00202 break;
00203
00204 case TGSI_FILE_IMMEDIATE:
00205 assert(index < p->num_immediates);
00206 index = p->immediates_map[index];
00207
00208 case TGSI_FILE_CONSTANT:
00209 src = UREG(REG_TYPE_CONST, index);
00210 break;
00211
00212 default:
00213 i915_program_error(p, "Bad source->File");
00214 return 0;
00215 }
00216
00217 if (source->SrcRegister.Extended) {
00218 src = swizzle(src,
00219 source->SrcRegisterExtSwz.ExtSwizzleX,
00220 source->SrcRegisterExtSwz.ExtSwizzleY,
00221 source->SrcRegisterExtSwz.ExtSwizzleZ,
00222 source->SrcRegisterExtSwz.ExtSwizzleW);
00223 }
00224 else {
00225 src = swizzle(src,
00226 source->SrcRegister.SwizzleX,
00227 source->SrcRegister.SwizzleY,
00228 source->SrcRegister.SwizzleZ,
00229 source->SrcRegister.SwizzleW);
00230 }
00231
00232
00233
00234
00235
00236 {
00237 int nx = source->SrcRegisterExtSwz.NegateX;
00238 int ny = source->SrcRegisterExtSwz.NegateY;
00239 int nz = source->SrcRegisterExtSwz.NegateZ;
00240 int nw = source->SrcRegisterExtSwz.NegateW;
00241 if (source->SrcRegister.Negate) {
00242 nx = !nx;
00243 ny = !ny;
00244 nz = !nz;
00245 nw = !nw;
00246 }
00247 src = negate(src, nx, ny, nz, nw);
00248 }
00249
00250
00251 #if 0
00252
00253
00254 assert(!source->SrcRegisterExtMod.Absolute);
00255 assert(!source->SrcRegisterExtMod.Negate);
00256 #endif
00257 return src;
00258 }
00259
00260
00264 static uint
00265 get_result_vector(struct i915_fp_compile *p,
00266 const struct tgsi_full_dst_register *dest)
00267 {
00268 switch (dest->DstRegister.File) {
00269 case TGSI_FILE_OUTPUT:
00270 {
00271 uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index];
00272 switch (sem_name) {
00273 case TGSI_SEMANTIC_POSITION:
00274 return UREG(REG_TYPE_OD, 0);
00275 case TGSI_SEMANTIC_COLOR:
00276 return UREG(REG_TYPE_OC, 0);
00277 default:
00278 i915_program_error(p, "Bad inst->DstReg.Index/semantics");
00279 return 0;
00280 }
00281 }
00282 case TGSI_FILE_TEMPORARY:
00283 return UREG(REG_TYPE_R, dest->DstRegister.Index);
00284 default:
00285 i915_program_error(p, "Bad inst->DstReg.File");
00286 return 0;
00287 }
00288 }
00289
00290
00294 static uint
00295 get_result_flags(const struct tgsi_full_instruction *inst)
00296 {
00297 const uint writeMask
00298 = inst->FullDstRegisters[0].DstRegister.WriteMask;
00299 uint flags = 0x0;
00300
00301 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
00302 flags |= A0_DEST_SATURATE;
00303
00304 if (writeMask & TGSI_WRITEMASK_X)
00305 flags |= A0_DEST_CHANNEL_X;
00306 if (writeMask & TGSI_WRITEMASK_Y)
00307 flags |= A0_DEST_CHANNEL_Y;
00308 if (writeMask & TGSI_WRITEMASK_Z)
00309 flags |= A0_DEST_CHANNEL_Z;
00310 if (writeMask & TGSI_WRITEMASK_W)
00311 flags |= A0_DEST_CHANNEL_W;
00312
00313 return flags;
00314 }
00315
00316
00320 static uint
00321 translate_tex_src_target(struct i915_fp_compile *p, uint tex)
00322 {
00323 switch (tex) {
00324 case TGSI_TEXTURE_1D:
00325 return D0_SAMPLE_TYPE_2D;
00326 case TGSI_TEXTURE_2D:
00327 return D0_SAMPLE_TYPE_2D;
00328 case TGSI_TEXTURE_RECT:
00329 return D0_SAMPLE_TYPE_2D;
00330 case TGSI_TEXTURE_3D:
00331 return D0_SAMPLE_TYPE_VOLUME;
00332 case TGSI_TEXTURE_CUBE:
00333 return D0_SAMPLE_TYPE_CUBE;
00334 default:
00335 i915_program_error(p, "TexSrc type");
00336 return 0;
00337 }
00338 }
00339
00340
00344 static void
00345 emit_tex(struct i915_fp_compile *p,
00346 const struct tgsi_full_instruction *inst,
00347 uint opcode)
00348 {
00349 uint texture = inst->InstructionExtTexture.Texture;
00350 uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
00351 uint tex = translate_tex_src_target( p, texture );
00352 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
00353 uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
00354
00355 i915_emit_texld( p,
00356 get_result_vector( p, &inst->FullDstRegisters[0] ),
00357 get_result_flags( inst ),
00358 sampler,
00359 coord,
00360 opcode);
00361 }
00362
00363
00369 static void
00370 emit_simple_arith(struct i915_fp_compile *p,
00371 const struct tgsi_full_instruction *inst,
00372 uint opcode, uint numArgs)
00373 {
00374 uint arg1, arg2, arg3;
00375
00376 assert(numArgs <= 3);
00377
00378 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
00379 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
00380 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
00381
00382 i915_emit_arith( p,
00383 opcode,
00384 get_result_vector( p, &inst->FullDstRegisters[0]),
00385 get_result_flags( inst ), 0,
00386 arg1,
00387 arg2,
00388 arg3 );
00389 }
00390
00391
00393 static void
00394 emit_simple_arith_swap2(struct i915_fp_compile *p,
00395 const struct tgsi_full_instruction *inst,
00396 uint opcode, uint numArgs)
00397 {
00398 struct tgsi_full_instruction inst2;
00399
00400 assert(numArgs == 2);
00401
00402
00403 inst2 = *inst;
00404 inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
00405 inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
00406
00407 emit_simple_arith(p, &inst2, opcode, numArgs);
00408 }
00409
00410
00411 #ifndef M_PI
00412 #define M_PI 3.14159265358979323846
00413 #endif
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424 static void
00425 i915_translate_instruction(struct i915_fp_compile *p,
00426 const struct tgsi_full_instruction *inst)
00427 {
00428 uint writemask;
00429 uint src0, src1, src2, flags;
00430 uint tmp = 0;
00431
00432 switch (inst->Instruction.Opcode) {
00433 case TGSI_OPCODE_ABS:
00434 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00435 i915_emit_arith(p,
00436 A0_MAX,
00437 get_result_vector(p, &inst->FullDstRegisters[0]),
00438 get_result_flags(inst), 0,
00439 src0, negate(src0, 1, 1, 1, 1), 0);
00440 break;
00441
00442 case TGSI_OPCODE_ADD:
00443 emit_simple_arith(p, inst, A0_ADD, 2);
00444 break;
00445
00446 case TGSI_OPCODE_CMP:
00447 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00448 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00449 src2 = src_vector(p, &inst->FullSrcRegisters[2]);
00450 i915_emit_arith(p, A0_CMP,
00451 get_result_vector(p, &inst->FullDstRegisters[0]),
00452 get_result_flags(inst),
00453 0, src0, src2, src1);
00454 break;
00455
00456 case TGSI_OPCODE_COS:
00457 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00458 tmp = i915_get_utemp(p);
00459
00460 i915_emit_arith(p,
00461 A0_MUL,
00462 tmp, A0_DEST_CHANNEL_X, 0,
00463 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
00464
00465 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
00466
00467
00468
00469 i915_emit_arith(p,
00470 A0_MUL,
00471 tmp, A0_DEST_CHANNEL_X, 0,
00472 tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
00473
00474
00475
00476
00477
00478
00479
00480 i915_emit_arith(p,
00481 A0_MUL,
00482 tmp, A0_DEST_CHANNEL_XY, 0,
00483 swizzle(tmp, X, X, ONE, ONE),
00484 swizzle(tmp, X, ONE, ONE, ONE), 0);
00485
00486 i915_emit_arith(p,
00487 A0_MUL,
00488 tmp, A0_DEST_CHANNEL_XYZ, 0,
00489 swizzle(tmp, X, Y, X, ONE),
00490 swizzle(tmp, X, X, ONE, ONE), 0);
00491
00492 i915_emit_arith(p,
00493 A0_MUL,
00494 tmp, A0_DEST_CHANNEL_XYZ, 0,
00495 swizzle(tmp, X, X, Z, ONE),
00496 swizzle(tmp, Z, ONE, ONE, ONE), 0);
00497
00498 i915_emit_arith(p,
00499 A0_DP4,
00500 get_result_vector(p, &inst->FullDstRegisters[0]),
00501 get_result_flags(inst), 0,
00502 swizzle(tmp, ONE, Z, Y, X),
00503 i915_emit_const4fv(p, cos_constants), 0);
00504 break;
00505
00506 case TGSI_OPCODE_DP3:
00507 emit_simple_arith(p, inst, A0_DP3, 2);
00508 break;
00509
00510 case TGSI_OPCODE_DP4:
00511 emit_simple_arith(p, inst, A0_DP4, 2);
00512 break;
00513
00514 case TGSI_OPCODE_DPH:
00515 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00516 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00517
00518 i915_emit_arith(p,
00519 A0_DP4,
00520 get_result_vector(p, &inst->FullDstRegisters[0]),
00521 get_result_flags(inst), 0,
00522 swizzle(src0, X, Y, Z, ONE), src1, 0);
00523 break;
00524
00525 case TGSI_OPCODE_DST:
00526 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00527 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00528
00529
00530
00531
00532
00533
00534 i915_emit_arith(p,
00535 A0_MUL,
00536 get_result_vector(p, &inst->FullDstRegisters[0]),
00537 get_result_flags(inst), 0,
00538 swizzle(src0, ONE, Y, Z, ONE),
00539 swizzle(src1, ONE, Y, ONE, W), 0);
00540 break;
00541
00542 case TGSI_OPCODE_END:
00543
00544 break;
00545
00546 case TGSI_OPCODE_EX2:
00547 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00548
00549 i915_emit_arith(p,
00550 A0_EXP,
00551 get_result_vector(p, &inst->FullDstRegisters[0]),
00552 get_result_flags(inst), 0,
00553 swizzle(src0, X, X, X, X), 0, 0);
00554 break;
00555
00556 case TGSI_OPCODE_FLR:
00557 emit_simple_arith(p, inst, A0_FLR, 1);
00558 break;
00559
00560 case TGSI_OPCODE_FRC:
00561 emit_simple_arith(p, inst, A0_FRC, 1);
00562 break;
00563
00564 case TGSI_OPCODE_KIL:
00565
00566 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00567 tmp = i915_get_utemp(p);
00568
00569 i915_emit_texld(p,
00570 tmp,
00571 A0_DEST_CHANNEL_ALL,
00572 0,
00573 src0,
00574 T0_TEXKILL);
00575 break;
00576
00577 case TGSI_OPCODE_KILP:
00578 assert(0);
00579 break;
00580
00581 case TGSI_OPCODE_LG2:
00582 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00583
00584 i915_emit_arith(p,
00585 A0_LOG,
00586 get_result_vector(p, &inst->FullDstRegisters[0]),
00587 get_result_flags(inst), 0,
00588 swizzle(src0, X, X, X, X), 0, 0);
00589 break;
00590
00591 case TGSI_OPCODE_LIT:
00592 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00593 tmp = i915_get_utemp(p);
00594
00595
00596
00597
00598
00599
00600
00601
00602 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
00603 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
00604
00605 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
00606 swizzle(tmp, Y, Y, Y, Y), 0, 0);
00607
00608 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
00609 swizzle(tmp, ZERO, Y, ZERO, ZERO),
00610 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
00611
00612 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
00613 swizzle(tmp, Y, Y, Y, Y), 0, 0);
00614
00615 i915_emit_arith(p, A0_CMP,
00616 get_result_vector(p, &inst->FullDstRegisters[0]),
00617 get_result_flags(inst), 0,
00618 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
00619 swizzle(tmp, ONE, X, ZERO, ONE),
00620 swizzle(tmp, ONE, X, Y, ONE));
00621
00622 break;
00623
00624 case TGSI_OPCODE_LRP:
00625 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00626 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00627 src2 = src_vector(p, &inst->FullSrcRegisters[2]);
00628 flags = get_result_flags(inst);
00629 tmp = i915_get_utemp(p);
00630
00631
00632
00633
00634
00635
00636
00637
00638 i915_emit_arith(p, A0_MAD, tmp,
00639 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
00640
00641 i915_emit_arith(p, A0_MAD,
00642 get_result_vector(p, &inst->FullDstRegisters[0]),
00643 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
00644 break;
00645
00646 case TGSI_OPCODE_MAD:
00647 emit_simple_arith(p, inst, A0_MAD, 3);
00648 break;
00649
00650 case TGSI_OPCODE_MAX:
00651 emit_simple_arith(p, inst, A0_MAX, 2);
00652 break;
00653
00654 case TGSI_OPCODE_MIN:
00655 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00656 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00657 tmp = i915_get_utemp(p);
00658 flags = get_result_flags(inst);
00659
00660 i915_emit_arith(p,
00661 A0_MAX,
00662 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
00663 negate(src0, 1, 1, 1, 1),
00664 negate(src1, 1, 1, 1, 1), 0);
00665
00666 i915_emit_arith(p,
00667 A0_MOV,
00668 get_result_vector(p, &inst->FullDstRegisters[0]),
00669 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
00670 break;
00671
00672 case TGSI_OPCODE_MOV:
00673 case TGSI_OPCODE_SWZ:
00674 emit_simple_arith(p, inst, A0_MOV, 1);
00675 break;
00676
00677 case TGSI_OPCODE_MUL:
00678 emit_simple_arith(p, inst, A0_MUL, 2);
00679 break;
00680
00681 case TGSI_OPCODE_POW:
00682 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00683 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00684 tmp = i915_get_utemp(p);
00685 flags = get_result_flags(inst);
00686
00687
00688
00689 i915_emit_arith(p,
00690 A0_LOG,
00691 tmp, A0_DEST_CHANNEL_X, 0,
00692 swizzle(src0, X, X, X, X), 0, 0);
00693
00694 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
00695
00696 i915_emit_arith(p,
00697 A0_EXP,
00698 get_result_vector(p, &inst->FullDstRegisters[0]),
00699 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
00700 break;
00701
00702 case TGSI_OPCODE_RET:
00703
00704 break;
00705
00706 case TGSI_OPCODE_RCP:
00707 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00708
00709 i915_emit_arith(p,
00710 A0_RCP,
00711 get_result_vector(p, &inst->FullDstRegisters[0]),
00712 get_result_flags(inst), 0,
00713 swizzle(src0, X, X, X, X), 0, 0);
00714 break;
00715
00716 case TGSI_OPCODE_RSQ:
00717 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00718
00719 i915_emit_arith(p,
00720 A0_RSQ,
00721 get_result_vector(p, &inst->FullDstRegisters[0]),
00722 get_result_flags(inst), 0,
00723 swizzle(src0, X, X, X, X), 0, 0);
00724 break;
00725
00726 case TGSI_OPCODE_SCS:
00727 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00728 tmp = i915_get_utemp(p);
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738 i915_emit_arith(p,
00739 A0_MUL,
00740 tmp, A0_DEST_CHANNEL_XY, 0,
00741 swizzle(src0, X, X, ONE, ONE),
00742 swizzle(src0, X, ONE, ONE, ONE), 0);
00743
00744 i915_emit_arith(p,
00745 A0_MUL,
00746 tmp, A0_DEST_CHANNEL_ALL, 0,
00747 swizzle(tmp, X, Y, X, Y),
00748 swizzle(tmp, X, X, ONE, ONE), 0);
00749
00750 writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
00751
00752 if (writemask & TGSI_WRITEMASK_Y) {
00753 uint tmp1;
00754
00755 if (writemask & TGSI_WRITEMASK_X)
00756 tmp1 = i915_get_utemp(p);
00757 else
00758 tmp1 = tmp;
00759
00760 i915_emit_arith(p,
00761 A0_MUL,
00762 tmp1, A0_DEST_CHANNEL_ALL, 0,
00763 swizzle(tmp, X, Y, Y, W),
00764 swizzle(tmp, X, Z, ONE, ONE), 0);
00765
00766 i915_emit_arith(p,
00767 A0_DP4,
00768 get_result_vector(p, &inst->FullDstRegisters[0]),
00769 A0_DEST_CHANNEL_Y, 0,
00770 swizzle(tmp1, W, Z, Y, X),
00771 i915_emit_const4fv(p, sin_constants), 0);
00772 }
00773
00774 if (writemask & TGSI_WRITEMASK_X) {
00775 i915_emit_arith(p,
00776 A0_MUL,
00777 tmp, A0_DEST_CHANNEL_XYZ, 0,
00778 swizzle(tmp, X, X, Z, ONE),
00779 swizzle(tmp, Z, ONE, ONE, ONE), 0);
00780
00781 i915_emit_arith(p,
00782 A0_DP4,
00783 get_result_vector(p, &inst->FullDstRegisters[0]),
00784 A0_DEST_CHANNEL_X, 0,
00785 swizzle(tmp, ONE, Z, Y, X),
00786 i915_emit_const4fv(p, cos_constants), 0);
00787 }
00788 break;
00789
00790 case TGSI_OPCODE_SGE:
00791 emit_simple_arith(p, inst, A0_SGE, 2);
00792 break;
00793
00794 case TGSI_OPCODE_SLE:
00795
00796 emit_simple_arith_swap2(p, inst, A0_SGE, 2);
00797 break;
00798
00799 case TGSI_OPCODE_SIN:
00800 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00801 tmp = i915_get_utemp(p);
00802
00803 i915_emit_arith(p,
00804 A0_MUL,
00805 tmp, A0_DEST_CHANNEL_X, 0,
00806 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
00807
00808 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
00809
00810
00811
00812 i915_emit_arith(p,
00813 A0_MUL,
00814 tmp, A0_DEST_CHANNEL_X, 0,
00815 tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
00816
00817
00818
00819
00820
00821
00822
00823 i915_emit_arith(p,
00824 A0_MUL,
00825 tmp, A0_DEST_CHANNEL_XY, 0,
00826 swizzle(tmp, X, X, ONE, ONE),
00827 swizzle(tmp, X, ONE, ONE, ONE), 0);
00828
00829 i915_emit_arith(p,
00830 A0_MUL,
00831 tmp, A0_DEST_CHANNEL_ALL, 0,
00832 swizzle(tmp, X, Y, X, Y),
00833 swizzle(tmp, X, X, ONE, ONE), 0);
00834
00835 i915_emit_arith(p,
00836 A0_MUL,
00837 tmp, A0_DEST_CHANNEL_ALL, 0,
00838 swizzle(tmp, X, Y, Y, W),
00839 swizzle(tmp, X, Z, ONE, ONE), 0);
00840
00841 i915_emit_arith(p,
00842 A0_DP4,
00843 get_result_vector(p, &inst->FullDstRegisters[0]),
00844 get_result_flags(inst), 0,
00845 swizzle(tmp, W, Z, Y, X),
00846 i915_emit_const4fv(p, sin_constants), 0);
00847 break;
00848
00849 case TGSI_OPCODE_SLT:
00850 emit_simple_arith(p, inst, A0_SLT, 2);
00851 break;
00852
00853 case TGSI_OPCODE_SGT:
00854
00855 emit_simple_arith_swap2(p, inst, A0_SLT, 2);
00856 break;
00857
00858 case TGSI_OPCODE_SUB:
00859 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00860 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00861
00862 i915_emit_arith(p,
00863 A0_ADD,
00864 get_result_vector(p, &inst->FullDstRegisters[0]),
00865 get_result_flags(inst), 0,
00866 src0, negate(src1, 1, 1, 1, 1), 0);
00867 break;
00868
00869 case TGSI_OPCODE_TEX:
00870 emit_tex(p, inst, T0_TEXLD);
00871 break;
00872
00873 case TGSI_OPCODE_TXB:
00874 emit_tex(p, inst, T0_TEXLDB);
00875 break;
00876
00877 case TGSI_OPCODE_TXP:
00878 emit_tex(p, inst, T0_TEXLDP);
00879 break;
00880
00881 case TGSI_OPCODE_XPD:
00882
00883
00884
00885
00886
00887
00888 src0 = src_vector(p, &inst->FullSrcRegisters[0]);
00889 src1 = src_vector(p, &inst->FullSrcRegisters[1]);
00890 tmp = i915_get_utemp(p);
00891
00892 i915_emit_arith(p,
00893 A0_MUL,
00894 tmp, A0_DEST_CHANNEL_ALL, 0,
00895 swizzle(src0, Z, X, Y, ONE),
00896 swizzle(src1, Y, Z, X, ONE), 0);
00897
00898 i915_emit_arith(p,
00899 A0_MAD,
00900 get_result_vector(p, &inst->FullDstRegisters[0]),
00901 get_result_flags(inst), 0,
00902 swizzle(src0, Y, Z, X, ONE),
00903 swizzle(src1, Z, X, Y, ONE),
00904 negate(tmp, 1, 1, 1, 0));
00905 break;
00906
00907 default:
00908 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
00909 p->error = 1;
00910 return;
00911 }
00912
00913 i915_release_utemps(p);
00914 }
00915
00916
00922 static void
00923 i915_translate_instructions(struct i915_fp_compile *p,
00924 const struct tgsi_token *tokens)
00925 {
00926 struct i915_fragment_shader *ifs = p->shader;
00927 struct tgsi_parse_context parse;
00928
00929 tgsi_parse_init( &parse, tokens );
00930
00931 while( !tgsi_parse_end_of_tokens( &parse ) ) {
00932
00933 tgsi_parse_token( &parse );
00934
00935 switch( parse.FullToken.Token.Type ) {
00936 case TGSI_TOKEN_TYPE_DECLARATION:
00937 if (parse.FullToken.FullDeclaration.Declaration.File
00938 == TGSI_FILE_CONSTANT) {
00939 uint i;
00940 for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
00941 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
00942 i++) {
00943 assert(ifs->constant_flags[i] == 0x0);
00944 ifs->constant_flags[i] = I915_CONSTFLAG_USER;
00945 ifs->num_constants = MAX2(ifs->num_constants, i + 1);
00946 }
00947 }
00948 else if (parse.FullToken.FullDeclaration.Declaration.File
00949 == TGSI_FILE_TEMPORARY) {
00950 uint i;
00951 for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
00952 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
00953 i++) {
00954 assert(i < I915_MAX_TEMPORARY);
00955
00956 p->temp_flag |= (1 << i);
00957 }
00958 }
00959 break;
00960
00961 case TGSI_TOKEN_TYPE_IMMEDIATE:
00962 {
00963 const struct tgsi_full_immediate *imm
00964 = &parse.FullToken.FullImmediate;
00965 const uint pos = p->num_immediates++;
00966 uint j;
00967 for (j = 0; j < imm->Immediate.Size; j++) {
00968 p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
00969 }
00970 }
00971 break;
00972
00973 case TGSI_TOKEN_TYPE_INSTRUCTION:
00974 if (p->first_instruction) {
00975
00976 uint i, j;
00977 for (i = 0; i < p->num_immediates; i++) {
00978
00979 for (j = 0; j < I915_MAX_CONSTANT; j++) {
00980 if (ifs->constant_flags[j] == 0x0) {
00981 memcpy(ifs->constants[j],
00982 p->immediates[i],
00983 4 * sizeof(float));
00984
00985 ifs->constant_flags[j] = 0xf;
00986 p->immediates_map[i] = j;
00987 ifs->num_constants = MAX2(ifs->num_constants, j + 1);
00988 break;
00989 }
00990 }
00991 }
00992
00993 p->first_instruction = FALSE;
00994 }
00995
00996 i915_translate_instruction(p, &parse.FullToken.FullInstruction);
00997 break;
00998
00999 default:
01000 assert( 0 );
01001 }
01002
01003 }
01004
01005 tgsi_parse_free (&parse);
01006 }
01007
01008
01009 static struct i915_fp_compile *
01010 i915_init_compile(struct i915_context *i915,
01011 struct i915_fragment_shader *ifs)
01012 {
01013 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
01014
01015 p->shader = ifs;
01016
01017
01018
01019
01020
01021
01022
01023 ifs->num_constants = 0;
01024 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
01025
01026 p->first_instruction = TRUE;
01027
01028 p->nr_tex_indirect = 1;
01029 p->nr_tex_insn = 0;
01030 p->nr_alu_insn = 0;
01031 p->nr_decl_insn = 0;
01032
01033 p->csr = p->program;
01034 p->decl = p->declarations;
01035 p->decl_s = 0;
01036 p->decl_t = 0;
01037 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
01038 p->utemp_flag = ~0x7;
01039
01040 p->wpos_tex = -1;
01041
01042
01043 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
01044
01045 return p;
01046 }
01047
01048
01049
01050
01051
01052 static void
01053 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
01054 {
01055 struct i915_fragment_shader *ifs = p->shader;
01056 unsigned long program_size = (unsigned long) (p->csr - p->program);
01057 unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
01058
01059 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
01060 i915_program_error(p, "Exceeded max nr indirect texture lookups");
01061
01062 if (p->nr_tex_insn > I915_MAX_TEX_INSN)
01063 i915_program_error(p, "Exceeded max TEX instructions");
01064
01065 if (p->nr_alu_insn > I915_MAX_ALU_INSN)
01066 i915_program_error(p, "Exceeded max ALU instructions");
01067
01068 if (p->nr_decl_insn > I915_MAX_DECL_INSN)
01069 i915_program_error(p, "Exceeded max DECL instructions");
01070
01071 if (p->error) {
01072 p->NumNativeInstructions = 0;
01073 p->NumNativeAluInstructions = 0;
01074 p->NumNativeTexInstructions = 0;
01075 p->NumNativeTexIndirections = 0;
01076
01077 i915_use_passthrough_shader(ifs);
01078 }
01079 else {
01080 p->NumNativeInstructions
01081 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
01082 p->NumNativeAluInstructions = p->nr_alu_insn;
01083 p->NumNativeTexInstructions = p->nr_tex_insn;
01084 p->NumNativeTexIndirections = p->nr_tex_indirect;
01085
01086
01087 p->declarations[0] |= program_size + decl_size - 2;
01088
01089
01090
01091 assert(!ifs->program);
01092 ifs->program
01093 = (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
01094 if (ifs->program) {
01095 ifs->program_len = program_size + decl_size;
01096
01097 memcpy(ifs->program,
01098 p->declarations,
01099 decl_size * sizeof(uint));
01100
01101 memcpy(ifs->program + decl_size,
01102 p->program,
01103 program_size * sizeof(uint));
01104 }
01105 }
01106
01107
01108
01109 FREE(p);
01110 }
01111
01112
01117 static void
01118 i915_find_wpos_space(struct i915_fp_compile *p)
01119 {
01120 #if 0
01121 const uint inputs
01122 = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS);
01123 uint i;
01124
01125 p->wpos_tex = -1;
01126
01127 if (inputs & (1 << TGSI_ATTRIB_POS)) {
01128 for (i = 0; i < I915_TEX_UNITS; i++) {
01129 if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) {
01130 p->wpos_tex = i;
01131 return;
01132 }
01133 }
01134
01135 i915_program_error(p, "No free texcoord for wpos value");
01136 }
01137 #else
01138 if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
01139
01140 #if 0
01141 assert(0);
01142 #endif
01143 }
01144 #endif
01145 }
01146
01147
01148
01149
01155 static void
01156 i915_fixup_depth_write(struct i915_fp_compile *p)
01157 {
01158
01159 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
01160 const uint depth = UREG(REG_TYPE_OD, 0);
01161
01162 i915_emit_arith(p,
01163 A0_MOV,
01164 depth,
01165 A0_DEST_CHANNEL_W,
01166 0,
01167 swizzle(depth, X, Y, Z, Z),
01168 0, 0 );
01169 }
01170 }
01171
01172
01173 void
01174 i915_translate_fragment_program( struct i915_context *i915,
01175 struct i915_fragment_shader *fs)
01176 {
01177 struct i915_fp_compile *p = i915_init_compile(i915, fs);
01178 const struct tgsi_token *tokens = fs->state.tokens;
01179
01180 i915_find_wpos_space(p);
01181
01182 #if 0
01183 tgsi_dump(tokens, 0);
01184 #endif
01185
01186 i915_translate_instructions(p, tokens);
01187 i915_fixup_depth_write(p);
01188
01189 i915_fini_compile(i915, p);
01190 }