Go to the source code of this file.
Defines | |
#define | M_PI 3.14159265358979323846 |
Functions | |
static int | negate (int reg, int x, int y, int z, int w) |
component-wise negation of ureg | |
static void | i915_use_passthrough_shader (struct i915_fragment_shader *fs) |
In the event of a translation failure, we'll generate a simple color pass-through program. | |
void | i915_program_error (struct i915_fp_compile *p, const char *msg,...) |
static uint | src_vector (struct i915_fp_compile *p, const struct tgsi_full_src_register *source) |
Construct a ureg for the given source register. | |
static uint | get_result_vector (struct i915_fp_compile *p, const struct tgsi_full_dst_register *dest) |
Construct a ureg for a destination register. | |
static uint | get_result_flags (const struct tgsi_full_instruction *inst) |
Compute flags for saturation and writemask. | |
static uint | translate_tex_src_target (struct i915_fp_compile *p, uint tex) |
Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token. | |
static void | emit_tex (struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode) |
Generate texel lookup instruction. | |
static void | emit_simple_arith (struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode, uint numArgs) |
Generate a simple arithmetic instruction. | |
static void | emit_simple_arith_swap2 (struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode, uint numArgs) |
As above, but swap the first two src regs. | |
static void | i915_translate_instruction (struct i915_fp_compile *p, const struct tgsi_full_instruction *inst) |
static void | i915_translate_instructions (struct i915_fp_compile *p, const struct tgsi_token *tokens) |
Translate TGSI fragment shader into i915 hardware instructions. | |
static struct i915_fp_compile * | i915_init_compile (struct i915_context *i915, struct i915_fragment_shader *ifs) |
static void | i915_fini_compile (struct i915_context *i915, struct i915_fp_compile *p) |
static void | i915_find_wpos_space (struct i915_fp_compile *p) |
Find an unused texture coordinate slot to use for fragment WPOS. | |
static void | i915_fixup_depth_write (struct i915_fp_compile *p) |
Rather than trying to intercept and jiggle depth writes during emit, just move the value into its correct position at the end of the program:. | |
void | i915_translate_fragment_program (struct i915_context *i915, struct i915_fragment_shader *fs) |
Variables | |
static unsigned | passthrough [] |
Simple pass-through fragment shader to use when we don't have a real shader (or it fails to compile for some reason). | |
static const float | sin_constants [4] |
static const float | cos_constants [4] |
#define M_PI 3.14159265358979323846 |
Definition at line 473 of file i915_fpc_translate.c.
static void emit_simple_arith | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_instruction * | inst, | |||
uint | opcode, | |||
uint | numArgs | |||
) | [static] |
Generate a simple arithmetic instruction.
opcode | the i915 opcode | |
numArgs | the number of input/src arguments |
Definition at line 431 of file i915_fpc_translate.c.
References A0_ADD, A0_CMP, A0_MAX, tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_result_flags(), get_result_vector(), i915_emit_arith(), negate(), src_vector(), TGSI_OPCODE_ABS, TGSI_OPCODE_ADD, and TGSI_OPCODE_CMP.
00432 { 00433 case TGSI_OPCODE_ABS: 00434 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00435 i915_emit_arith(p, 00436 A0_MAX, 00437 get_result_vector(p, &inst->FullDstRegisters[0]), 00438 get_result_flags(inst), 0, 00439 src0, negate(src0, 1, 1, 1, 1), 0); 00440 break; 00441 00442 case TGSI_OPCODE_ADD: 00443 emit_simple_arith(p, inst, A0_ADD, 2); 00444 break; 00445 00446 case TGSI_OPCODE_CMP: 00447 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00448 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00449 src2 = src_vector(p, &inst->FullSrcRegisters[2]); 00450 i915_emit_arith(p, A0_CMP,
static void emit_simple_arith_swap2 | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_instruction * | inst, | |||
uint | opcode, | |||
uint | numArgs | |||
) | [static] |
As above, but swap the first two src regs.
Definition at line 455 of file i915_fpc_translate.c.
00456 : 00457 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00458 tmp = i915_get_utemp(p); 00459 00460 i915_emit_arith(p, 00461 A0_MUL, 00462 tmp, A0_DEST_CHANNEL_X, 0, 00463 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 00464 00465 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 00466 00467 /* By choosing different taylor constants, could get rid of this mul: 00468 */ 00469 i915_emit_arith(p, A0_MUL,
static void emit_tex | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_instruction * | inst, | |||
uint | opcode | |||
) | [static] |
Generate texel lookup instruction.
Definition at line 406 of file i915_fpc_translate.c.
00418 : 00419 * 00420 * SIN, COS -- could use another taylor step? 00421 * LIT -- results seem a little different to sw mesa 00422 * LOG -- different to mesa on negative numbers, but this is conformant. */
static uint get_result_flags | ( | const struct tgsi_full_instruction * | inst | ) | [static] |
Compute flags for saturation and writemask.
Definition at line 356 of file i915_fpc_translate.c.
00373 { 00374 uint arg1, arg2, arg3; 00375
static uint get_result_vector | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_dst_register * | dest | |||
) | [static] |
Construct a ureg for a destination register.
Definition at line 319 of file i915_fpc_translate.c.
00322 { 00323 switch (tex) { 00324 case TGSI_TEXTURE_1D: 00325 return D0_SAMPLE_TYPE_2D; 00326 case TGSI_TEXTURE_2D: 00327 return D0_SAMPLE_TYPE_2D; 00328 case TGSI_TEXTURE_RECT: 00329 return D0_SAMPLE_TYPE_2D; 00330 case TGSI_TEXTURE_3D: 00331 return D0_SAMPLE_TYPE_VOLUME; 00332 case TGSI_TEXTURE_CUBE: 00333 return D0_SAMPLE_TYPE_CUBE; 00334 default: 00335 i915_program_error(p, "TexSrc type"); 00336 return 0; 00337 } 00338 } 00339 00340 00344 static void 00345 emit_tex(struct i915_fp_compile *p, 00346 const struct tgsi_full_instruction *inst, 00347 uint opcode) 00348 { 00349 uint texture = inst->InstructionExtTexture.Texture;
static void i915_find_wpos_space | ( | struct i915_fp_compile * | p | ) | [static] |
Find an unused texture coordinate slot to use for fragment WPOS.
Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
Definition at line 1192 of file i915_fpc_translate.c.
static void i915_fini_compile | ( | struct i915_context * | i915, | |
struct i915_fp_compile * | p | |||
) | [static] |
Definition at line 1127 of file i915_fpc_translate.c.
References i915_program_error(), I915_TEX_UNITS, and i915_fp_compile::wpos_tex.
01127 { 01128 for (i = 0; i < I915_TEX_UNITS; i++) { 01129 if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { 01130 p->wpos_tex = i; 01131 return; 01132 } 01133 } 01134 01135 i915_program_error(p, "No free texcoord for wpos value"); 01136 } 01137 #else 01138 if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 01139 /* frag shader using the fragment position input */ 01140 #if 0 01141 assert(0); 01142 #endif 01143 } 01144 #endif 01145 } 01146 01147 01148 01149 01155 static void 01156 i915_fixup_depth_write(struct i915_fp_compile *p) 01157 { 01158 /* XXX assuming pos/depth is always in output[0] */ 01159 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 01160 const uint depth = UREG(REG_TYPE_OD, 0); 01161 01162 i915_emit_arith(p, 01163 A0_MOV, /* opcode */ 01164 depth, /* dest reg */ 01165 A0_DEST_CHANNEL_W, /* write mask */ 01166 0, /* saturate? */ 01167 swizzle(depth, X, Y, Z, Z), /* src0 */ 01168 0, 0 /* src1, src2 */); 01169 } 01170 } 01171 01172 01173 void 01174 i915_translate_fragment_program( struct i915_context *i915, 01175 struct i915_fragment_shader *fs) 01176 { 01177 struct i915_fp_compile *p = i915_init_compile(i915, fs); 01178 const struct tgsi_token *tokens = fs->state.tokens; 01179 01180 i915_find_wpos_space(p); 01181 01182 #if 0 01183 tgsi_dump(tokens, 0); 01184 #endif
static void i915_fixup_depth_write | ( | struct i915_fp_compile * | p | ) | [static] |
Rather than trying to intercept and jiggle depth writes during emit, just move the value into its correct position at the end of the program:.
Definition at line 1230 of file i915_fpc_translate.c.
static struct i915_fp_compile* i915_init_compile | ( | struct i915_context * | i915, | |
struct i915_fragment_shader * | ifs | |||
) | [static, read] |
Definition at line 1084 of file i915_fpc_translate.c.
01089 : 01090 */ 01091 assert(!ifs->program); 01092 ifs->program 01093 = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); 01094 if (ifs->program) { 01095 ifs->program_len = program_size + decl_size; 01096 01097 memcpy(ifs->program, 01098 p->declarations, 01099 decl_size * sizeof(uint)); 01100 01101 memcpy(ifs->program + decl_size, 01102 p->program, 01103 program_size * sizeof(uint)); 01104 } 01105 } 01106 01107 /* Release the compilation struct: 01108 */ 01109 FREE(p); 01110 } 01111 01112 01117 static void 01118 i915_find_wpos_space(struct i915_fp_compile *p) 01119 { 01120 #if 0
void i915_program_error | ( | struct i915_fp_compile * | p, | |
const char * | msg, | |||
... | ||||
) |
Definition at line 137 of file i915_fpc_translate.c.
References tgsi_src_register::File, I915_MAX_TEMPORARY, i915_program_error(), tgsi_src_register::Index, REG_TYPE_R, tgsi_full_src_register::SrcRegister, TGSI_FILE_INPUT, TGSI_FILE_TEMPORARY, and UREG.
00145 { 00146 uint index = source->SrcRegister.Index; 00147 uint src, sem_name, sem_ind; 00148 00149 switch (source->SrcRegister.File) { 00150 case TGSI_FILE_TEMPORARY:
void i915_translate_fragment_program | ( | struct i915_context * | i915, | |
struct i915_fragment_shader * | fs | |||
) |
Definition at line 1251 of file i915_fpc_translate.c.
static void i915_translate_instruction | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Definition at line 486 of file i915_fpc_translate.c.
00506 : 00507 emit_simple_arith(p, inst, A0_DP3, 2); 00508 break; 00509 00510 case TGSI_OPCODE_DP4: 00511 emit_simple_arith(p, inst, A0_DP4, 2); 00512 break; 00513 00514 case TGSI_OPCODE_DPH: 00515 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00516 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00517 00518 i915_emit_arith(p, 00519 A0_DP4, 00520 get_result_vector(p, &inst->FullDstRegisters[0]), 00521 get_result_flags(inst), 0, 00522 swizzle(src0, X, Y, Z, ONE), src1, 0); 00523 break; 00524 00525 case TGSI_OPCODE_DST: 00526 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00527 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00528 00529 /* result[0] = 1 * 1; 00530 * result[1] = a[1] * b[1]; 00531 * result[2] = a[2] * 1; 00532 * result[3] = 1 * b[3]; 00533 */ 00534 i915_emit_arith(p, 00535 A0_MUL, 00536 get_result_vector(p, &inst->FullDstRegisters[0]), 00537 get_result_flags(inst), 0, 00538 swizzle(src0, ONE, Y, Z, ONE), 00539 swizzle(src1, ONE, Y, ONE, W), 0); 00540 break; 00541 00542 case TGSI_OPCODE_END: 00543 /* no-op */ 00544 break; 00545 00546 case TGSI_OPCODE_EX2: 00547 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00548 00549 i915_emit_arith(p, 00550 A0_EXP, 00551 get_result_vector(p, &inst->FullDstRegisters[0]), 00552 get_result_flags(inst), 0, 00553 swizzle(src0, X, X, X, X), 0, 0); 00554 break; 00555 00556 case TGSI_OPCODE_FLR: 00557 emit_simple_arith(p, inst, A0_FLR, 1); 00558 break; 00559 00560 case TGSI_OPCODE_FRC: 00561 emit_simple_arith(p, inst, A0_FRC, 1); 00562 break; 00563 00564 case TGSI_OPCODE_KIL: 00565 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 00566 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00567 tmp = i915_get_utemp(p); 00568 00569 i915_emit_texld(p, 00570 tmp, /* dest reg: a dummy reg */ 00571 A0_DEST_CHANNEL_ALL, /* dest writemask */ 00572 0, /* sampler */ 00573 src0, /* coord*/ 00574 T0_TEXKILL); /* opcode */ 00575 break; 00576 00577 case TGSI_OPCODE_KILP: 00578 assert(0); /* not tested yet */ 00579 break; 00580 00581 case TGSI_OPCODE_LG2: 00582 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00583 00584 i915_emit_arith(p, 00585 A0_LOG, 00586 get_result_vector(p, &inst->FullDstRegisters[0]), 00587 get_result_flags(inst), 0, 00588 swizzle(src0, X, X, X, X), 0, 0); 00589 break; 00590 00591 case TGSI_OPCODE_LIT: 00592 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00593 tmp = i915_get_utemp(p); 00594 00595 /* tmp = max( a.xyzw, a.00zw ) 00596 * XXX: Clamp tmp.w to -128..128 00597 * tmp.y = log(tmp.y) 00598 * tmp.y = tmp.w * tmp.y 00599 * tmp.y = exp(tmp.y) 00600 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 00601 */ 00602 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 00603 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 00604 00605 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 00606 swizzle(tmp, Y, Y, Y, Y), 0, 0); 00607 00608 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 00609 swizzle(tmp, ZERO, Y, ZERO, ZERO), 00610 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 00611 00612 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 00613 swizzle(tmp, Y, Y, Y, Y), 0, 0); 00614 00615 i915_emit_arith(p, A0_CMP, 00616 get_result_vector(p, &inst->FullDstRegisters[0]), 00617 get_result_flags(inst), 0, 00618 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 00619 swizzle(tmp, ONE, X, ZERO, ONE), 00620 swizzle(tmp, ONE, X, Y, ONE)); 00621 00622 break; 00623 00624 case TGSI_OPCODE_LRP: 00625 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00626 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00627 src2 = src_vector(p, &inst->FullSrcRegisters[2]); 00628 flags = get_result_flags(inst); 00629 tmp = i915_get_utemp(p); 00630 00631 /* b*a + c*(1-a) 00632 * 00633 * b*a + c - ca 00634 * 00635 * tmp = b*a + c, 00636 * result = (-c)*a + tmp 00637 */ 00638 i915_emit_arith(p, A0_MAD, tmp, 00639 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 00640 00641 i915_emit_arith(p, A0_MAD, 00642 get_result_vector(p, &inst->FullDstRegisters[0]), 00643 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 00644 break; 00645 00646 case TGSI_OPCODE_MAD: 00647 emit_simple_arith(p, inst, A0_MAD, 3); 00648 break; 00649 00650 case TGSI_OPCODE_MAX: 00651 emit_simple_arith(p, inst, A0_MAX, 2); 00652 break; 00653 00654 case TGSI_OPCODE_MIN: 00655 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00656 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00657 tmp = i915_get_utemp(p); 00658 flags = get_result_flags(inst); 00659 00660 i915_emit_arith(p, 00661 A0_MAX, 00662 tmp, flags & A0_DEST_CHANNEL_ALL, 0, 00663 negate(src0, 1, 1, 1, 1), 00664 negate(src1, 1, 1, 1, 1), 0); 00665 00666 i915_emit_arith(p, 00667 A0_MOV, 00668 get_result_vector(p, &inst->FullDstRegisters[0]), 00669 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); 00670 break; 00671 00672 case TGSI_OPCODE_MOV: 00673 case TGSI_OPCODE_SWZ: 00674 emit_simple_arith(p, inst, A0_MOV, 1); 00675 break; 00676 00677 case TGSI_OPCODE_MUL: 00678 emit_simple_arith(p, inst, A0_MUL, 2); 00679 break; 00680 00681 case TGSI_OPCODE_POW: 00682 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00683 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00684 tmp = i915_get_utemp(p); 00685 flags = get_result_flags(inst); 00686 00687 /* XXX: masking on intermediate values, here and elsewhere. 00688 */ 00689 i915_emit_arith(p, 00690 A0_LOG, 00691 tmp, A0_DEST_CHANNEL_X, 0, 00692 swizzle(src0, X, X, X, X), 0, 0); 00693 00694 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 00695 00696 i915_emit_arith(p, 00697 A0_EXP, 00698 get_result_vector(p, &inst->FullDstRegisters[0]), 00699 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 00700 break; 00701 00702 case TGSI_OPCODE_RET: 00703 /* XXX: no-op? */ 00704 break; 00705 00706 case TGSI_OPCODE_RCP: 00707 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00708 00709 i915_emit_arith(p, 00710 A0_RCP, 00711 get_result_vector(p, &inst->FullDstRegisters[0]), 00712 get_result_flags(inst), 0, 00713 swizzle(src0, X, X, X, X), 0, 0); 00714 break; 00715 00716 case TGSI_OPCODE_RSQ: 00717 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00718 00719 i915_emit_arith(p, 00720 A0_RSQ, 00721 get_result_vector(p, &inst->FullDstRegisters[0]), 00722 get_result_flags(inst), 0, 00723 swizzle(src0, X, X, X, X), 0, 0); 00724 break; 00725 00726 case TGSI_OPCODE_SCS: 00727 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00728 tmp = i915_get_utemp(p); 00729 00730 /* 00731 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 00732 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 00733 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 00734 * scs.x = DP4 t1, sin_constants 00735 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 00736 * scs.y = DP4 t1, cos_constants 00737 */ 00738 i915_emit_arith(p, 00739 A0_MUL, 00740 tmp, A0_DEST_CHANNEL_XY, 0, 00741 swizzle(src0, X, X, ONE, ONE), 00742 swizzle(src0, X, ONE, ONE, ONE), 0); 00743 00744 i915_emit_arith(p, 00745 A0_MUL, 00746 tmp, A0_DEST_CHANNEL_ALL, 0, 00747 swizzle(tmp, X, Y, X, Y), 00748 swizzle(tmp, X, X, ONE, ONE), 0); 00749 00750 writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 00751 00752 if (writemask & TGSI_WRITEMASK_Y) { 00753 uint tmp1; 00754 00755 if (writemask & TGSI_WRITEMASK_X) 00756 tmp1 = i915_get_utemp(p); 00757 else 00758 tmp1 = tmp; 00759 00760 i915_emit_arith(p, 00761 A0_MUL, 00762 tmp1, A0_DEST_CHANNEL_ALL, 0, 00763 swizzle(tmp, X, Y, Y, W), 00764 swizzle(tmp, X, Z, ONE, ONE), 0); 00765 00766 i915_emit_arith(p, 00767 A0_DP4, 00768 get_result_vector(p, &inst->FullDstRegisters[0]), 00769 A0_DEST_CHANNEL_Y, 0, 00770 swizzle(tmp1, W, Z, Y, X), 00771 i915_emit_const4fv(p, sin_constants), 0); 00772 } 00773 00774 if (writemask & TGSI_WRITEMASK_X) { 00775 i915_emit_arith(p, 00776 A0_MUL, 00777 tmp, A0_DEST_CHANNEL_XYZ, 0, 00778 swizzle(tmp, X, X, Z, ONE), 00779 swizzle(tmp, Z, ONE, ONE, ONE), 0); 00780 00781 i915_emit_arith(p, 00782 A0_DP4, 00783 get_result_vector(p, &inst->FullDstRegisters[0]), 00784 A0_DEST_CHANNEL_X, 0, 00785 swizzle(tmp, ONE, Z, Y, X), 00786 i915_emit_const4fv(p, cos_constants), 0); 00787 } 00788 break; 00789 00790 case TGSI_OPCODE_SGE: 00791 emit_simple_arith(p, inst, A0_SGE, 2); 00792 break; 00793 00794 case TGSI_OPCODE_SLE: 00795 /* like SGE, but swap reg0, reg1 */ 00796 emit_simple_arith_swap2(p, inst, A0_SGE, 2); 00797 break; 00798 00799 case TGSI_OPCODE_SIN: 00800 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00801 tmp = i915_get_utemp(p); 00802 00803 i915_emit_arith(p, 00804 A0_MUL, 00805 tmp, A0_DEST_CHANNEL_X, 0, 00806 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 00807 00808 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 00809 00810 /* By choosing different taylor constants, could get rid of this mul: 00811 */ 00812 i915_emit_arith(p, 00813 A0_MUL, 00814 tmp, A0_DEST_CHANNEL_X, 0, 00815 tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); 00816 00817 /* 00818 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 00819 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 00820 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 00821 * result = DP4 t1.wzyx, sin_constants 00822 */ 00823 i915_emit_arith(p, 00824 A0_MUL, 00825 tmp, A0_DEST_CHANNEL_XY, 0, 00826 swizzle(tmp, X, X, ONE, ONE), 00827 swizzle(tmp, X, ONE, ONE, ONE), 0); 00828 00829 i915_emit_arith(p, 00830 A0_MUL, 00831 tmp, A0_DEST_CHANNEL_ALL, 0, 00832 swizzle(tmp, X, Y, X, Y), 00833 swizzle(tmp, X, X, ONE, ONE), 0); 00834 00835 i915_emit_arith(p, 00836 A0_MUL, 00837 tmp, A0_DEST_CHANNEL_ALL, 0, 00838 swizzle(tmp, X, Y, Y, W), 00839 swizzle(tmp, X, Z, ONE, ONE), 0); 00840 00841 i915_emit_arith(p, 00842 A0_DP4, 00843 get_result_vector(p, &inst->FullDstRegisters[0]), 00844 get_result_flags(inst), 0, 00845 swizzle(tmp, W, Z, Y, X), 00846 i915_emit_const4fv(p, sin_constants), 0); 00847 break; 00848 00849 case TGSI_OPCODE_SLT: 00850 emit_simple_arith(p, inst, A0_SLT, 2); 00851 break; 00852 00853 case TGSI_OPCODE_SGT: 00854 /* like SLT, but swap reg0, reg1 */ 00855 emit_simple_arith_swap2(p, inst, A0_SLT, 2); 00856 break; 00857 00858 case TGSI_OPCODE_SUB: 00859 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00860 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00861 00862 i915_emit_arith(p, 00863 A0_ADD, 00864 get_result_vector(p, &inst->FullDstRegisters[0]), 00865 get_result_flags(inst), 0, 00866 src0, negate(src1, 1, 1, 1, 1), 0); 00867 break; 00868 00869 case TGSI_OPCODE_TEX: 00870 emit_tex(p, inst, T0_TEXLD); 00871 break; 00872 00873 case TGSI_OPCODE_TXB: 00874 emit_tex(p, inst, T0_TEXLDB); 00875 break; 00876 00877 case TGSI_OPCODE_TXP: 00878 emit_tex(p, inst, T0_TEXLDP); 00879 break; 00880 00881 case TGSI_OPCODE_XPD: 00882 /* Cross product: 00883 * result.x = src0.y * src1.z - src0.z * src1.y; 00884 * result.y = src0.z * src1.x - src0.x * src1.z; 00885 * result.z = src0.x * src1.y - src0.y * src1.x; 00886 * result.w = undef; 00887 */ 00888 src0 = src_vector(p, &inst->FullSrcRegisters[0]); 00889 src1 = src_vector(p, &inst->FullSrcRegisters[1]); 00890 tmp = i915_get_utemp(p); 00891 00892 i915_emit_arith(p, 00893 A0_MUL, 00894 tmp, A0_DEST_CHANNEL_ALL, 0, 00895 swizzle(src0, Z, X, Y, ONE), 00896 swizzle(src1, Y, Z, X, ONE), 0); 00897 00898 i915_emit_arith(p, 00899 A0_MAD, 00900 get_result_vector(p, &inst->FullDstRegisters[0]), 00901 get_result_flags(inst), 0, 00902 swizzle(src0, Y, Z, X, ONE), 00903 swizzle(src1, Z, X, Y, ONE), 00904 negate(tmp, 1, 1, 1, 0)); 00905 break; 00906 00907 default: 00908 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 00909 p->error = 1; 00910 return; 00911 } 00912 00913 i915_release_utemps(p); 00914 } 00915 00916 00922 static void 00923 i915_translate_instructions(struct i915_fp_compile *p, 00924 const struct tgsi_token *tokens) 00925 { 00926 struct i915_fragment_shader *ifs = p->shader; 00927 struct tgsi_parse_context parse; 00928 00929 tgsi_parse_init( &parse, tokens ); 00930 00931 while( !tgsi_parse_end_of_tokens( &parse ) ) { 00932 00933 tgsi_parse_token( &parse ); 00934 00935 switch( parse.FullToken.Token.Type ) { 00936 case TGSI_TOKEN_TYPE_DECLARATION: 00937 if (parse.FullToken.FullDeclaration.Declaration.File 00938 == TGSI_FILE_CONSTANT) { 00939 uint i; 00940 for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; 00941 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; 00942 i++) { 00943 assert(ifs->constant_flags[i] == 0x0); 00944 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 00945 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 00946 } 00947 } 00948 else if (parse.FullToken.FullDeclaration.Declaration.File 00949 == TGSI_FILE_TEMPORARY) { 00950 uint i; 00951 for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; 00952 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; 00953 i++) { 00954 assert(i < I915_MAX_TEMPORARY); 00955 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 00956 p->temp_flag |= (1 << i); /* mark temp as used */ 00957 } 00958 } 00959 break; 00960 00961 case TGSI_TOKEN_TYPE_IMMEDIATE: 00962 { 00963 const struct tgsi_full_immediate *imm 00964 = &parse.FullToken.FullImmediate; 00965 const uint pos = p->num_immediates++; 00966 uint j; 00967 for (j = 0; j < imm->Immediate.Size; j++) { 00968 p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; 00969 } 00970 } 00971 break; 00972 00973 case TGSI_TOKEN_TYPE_INSTRUCTION: 00974 if (p->first_instruction) { 00975 /* resolve location of immediates */ 00976 uint i, j; 00977 for (i = 0; i < p->num_immediates; i++) { 00978 /* find constant slot for this immediate */ 00979 for (j = 0; j < I915_MAX_CONSTANT; j++) { 00980 if (ifs->constant_flags[j] == 0x0) { 00981 memcpy(ifs->constants[j], 00982 p->immediates[i], 00983 4 * sizeof(float)); 00984 /*printf("immediate %d maps to const %d\n", i, j);*/ 00985 ifs->constant_flags[j] = 0xf; /* all four comps used */ 00986 p->immediates_map[i] = j; 00987 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 00988 break; }
static void i915_translate_instructions | ( | struct i915_fp_compile * | p, | |
const struct tgsi_token * | tokens | |||
) | [static] |
Translate TGSI fragment shader into i915 hardware instructions.
p | the translation state | |
tokens | the TGSI token array |
Definition at line 997 of file i915_fpc_translate.c.
00999 : 01000 assert( 0 ); 01001 } 01002 01003 } /* while */ 01004 01005 tgsi_parse_free (&parse); 01006 } 01007 01008 01009 static struct i915_fp_compile * 01010 i915_init_compile(struct i915_context *i915, 01011 struct i915_fragment_shader *ifs) 01012 { 01013 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 01014 01015 p->shader = ifs; 01016 01017 /* Put new constants at end of const buffer, growing downward. 01018 * The problem is we don't know how many user-defined constants might 01019 * be specified with pipe->set_constant_buffer(). 01020 * Should pre-scan the user's program to determine the highest-numbered 01021 * constant referenced. 01022 */ 01023 ifs->num_constants = 0; 01024 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 01025 01026 p->first_instruction = TRUE; 01027 01028 p->nr_tex_indirect = 1; /* correct? */ 01029 p->nr_tex_insn = 0; 01030 p->nr_alu_insn = 0; 01031 p->nr_decl_insn = 0; 01032 01033 p->csr = p->program; 01034 p->decl = p->declarations; 01035 p->decl_s = 0; 01036 p->decl_t = 0; 01037 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 01038 p->utemp_flag = ~0x7; 01039 01040 p->wpos_tex = -1; 01041 01042 /* initialize the first program word */ 01043 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 01044 01045 return p; 01046 } 01047 01048 01049 /* Copy compile results to the fragment program struct and destroy the 01050 * compilation context. 01051 */ 01052 static void 01053 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 01054 { 01055 struct i915_fragment_shader *ifs = p->shader; 01056 unsigned long program_size = (unsigned long) (p->csr - p->program); 01057 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 01058 01059 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 01060 i915_program_error(p, "Exceeded max nr indirect texture lookups"); 01061 01062 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 01063 i915_program_error(p, "Exceeded max TEX instructions"); 01064 01065 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 01066 i915_program_error(p, "Exceeded max ALU instructions"); 01067 01068 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 01069 i915_program_error(p, "Exceeded max DECL instructions"); 01070 01071 if (p->error) { 01072 p->NumNativeInstructions = 0; 01073 p->NumNativeAluInstructions = 0; 01074 p->NumNativeTexInstructions = 0; 01075 p->NumNativeTexIndirections = 0; 01076 01077 i915_use_passthrough_shader(ifs); 01078 } 01079 else { 01080 p->NumNativeInstructions = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
static void i915_use_passthrough_shader | ( | struct i915_fragment_shader * | fs | ) | [static] |
In the event of a translation failure, we'll generate a simple color pass-through program.
Definition at line 125 of file i915_fpc_translate.c.
00126 : "); 00127 va_start( args, msg ); 00128 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 00129 va_end( args ); 00130 debug_printf(buffer); 00131 debug_printf("\n"); 00132 00133 p->error = 1;
static int negate | ( | int | reg, | |
int | x, | |||
int | y, | |||
int | z, | |||
int | w | |||
) | [static] |
component-wise negation of ureg
Definition at line 110 of file i915_fpc_translate.c.
References Elements, MALLOC, i915_fragment_shader::num_constants, passthrough, i915_fragment_shader::program, and i915_fragment_shader::program_len.
00110 { 00111 fs->program = (uint *) MALLOC(sizeof(passthrough)); 00112 if (fs->program) { 00113 memcpy(fs->program, passthrough, sizeof(passthrough)); 00114 fs->program_len = Elements(passthrough); 00115 } 00116 fs->num_constants = 0; 00117 }
static uint src_vector | ( | struct i915_fp_compile * | p, | |
const struct tgsi_full_src_register * | source | |||
) | [static] |
Construct a ureg for the given source register.
Will emit constants, apply swizzling and negation as needed.
Definition at line 159 of file i915_fpc_translate.c.
References assert, D0_CHANNEL_ALL, D0_CHANNEL_W, D0_CHANNEL_XYZ, debug_printf(), i915_emit_decl(), i915_program_error(), ONE, REG_TYPE_T, swizzle(), T_DIFFUSE, T_FOG_W, T_SPECULAR, T_TEX0, TGSI_SEMANTIC_COLOR, TGSI_SEMANTIC_FOG, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_POSITION, W, X, Y, and Z.
00171 { 00172 case TGSI_SEMANTIC_POSITION: 00173 debug_printf("SKIP SEM POS\n"); 00174 /* 00175 assert(p->wpos_tex != -1); 00176 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); 00177 */ 00178 break; 00179 case TGSI_SEMANTIC_COLOR: 00180 if (sem_ind == 0) { 00181 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 00182 } 00183 else { 00184 /* secondary color */ 00185 assert(sem_ind == 1); 00186 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 00187 src = swizzle(src, X, Y, Z, ONE); 00188 } 00189 break; 00190 case TGSI_SEMANTIC_FOG: 00191 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 00192 src = swizzle(src, W, W, W, W); 00193 break; 00194 case TGSI_SEMANTIC_GENERIC: 00195 /* usually a texcoord */ 00196 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); 00197 break; 00198 default: 00199 i915_program_error(p, "Bad source->Index"); 00200 return 0; 00201 } 00202 break; 00203 00204 case TGSI_FILE_IMMEDIATE: 00205 assert(index < p->num_immediates); 00206 index = p->immediates_map[index]; 00207 /* fall-through */ 00208 case TGSI_FILE_CONSTANT: 00209 src = UREG(REG_TYPE_CONST, index); 00210 break; 00211 00212 default: 00213 i915_program_error(p, "Bad source->File"); 00214 return 0; 00215 } 00216 00217 if (source->SrcRegister.Extended) { 00218 src = swizzle(src, 00219 source->SrcRegisterExtSwz.ExtSwizzleX, 00220 source->SrcRegisterExtSwz.ExtSwizzleY, 00221 source->SrcRegisterExtSwz.ExtSwizzleZ, 00222 source->SrcRegisterExtSwz.ExtSwizzleW); 00223 } 00224 else { 00225 src = swizzle(src, 00226 source->SrcRegister.SwizzleX, 00227 source->SrcRegister.SwizzleY, 00228 source->SrcRegister.SwizzleZ, 00229 source->SrcRegister.SwizzleW); 00230 } 00231 00232 00233 /* There's both negate-all-components and per-component negation. 00234 * Try to handle both here. 00235 */ 00236 { 00237 int nx = source->SrcRegisterExtSwz.NegateX; 00238 int ny = source->SrcRegisterExtSwz.NegateY; 00239 int nz = source->SrcRegisterExtSwz.NegateZ; 00240 int nw = source->SrcRegisterExtSwz.NegateW; 00241 if (source->SrcRegister.Negate) { 00242 nx = !nx; 00243 ny = !ny; 00244 nz = !nz; 00245 nw = !nw; 00246 } 00247 src = negate(src, nx, ny, nz, nw); 00248 } 00249 00250 /* no abs() or post-abs negation */ 00251 #if 0 00252 /* XXX assertions disabled to allow arbfplight.c to run */ 00253 /* XXX enable these assertions, or fix things */ 00254 assert(!source->SrcRegisterExtMod.Absolute); 00255 assert(!source->SrcRegisterExtMod.Negate); 00256 #endif 00257 return src; 00258 } 00259 00260 00264 static uint 00265 get_result_vector(struct i915_fp_compile *p, 00266 const struct tgsi_full_dst_register *dest) 00267 { 00268 switch (dest->DstRegister.File) { 00269 case TGSI_FILE_OUTPUT: 00270 { 00271 uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; 00272 switch (sem_name) { 00273 case TGSI_SEMANTIC_POSITION: 00274 return UREG(REG_TYPE_OD, 0); 00275 case TGSI_SEMANTIC_COLOR: 00276 return UREG(REG_TYPE_OC, 0); 00277 default: 00278 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 00279 return 0; 00280 } 00281 } 00282 case TGSI_FILE_TEMPORARY: 00283 return UREG(REG_TYPE_R, dest->DstRegister.Index); 00284 default: 00285 i915_program_error(p, "Bad inst->DstReg.File"); 00286 return 0; 00287 } 00288 } 00289 00290 00294 static uint 00295 get_result_flags(const struct tgsi_full_instruction *inst) 00296 { 00297 const uint writeMask 00298 = inst->FullDstRegisters[0].DstRegister.WriteMask; 00299 uint flags = 0x0; 00300 00301 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) 00302 flags |= A0_DEST_SATURATE; 00303 00304 if (writeMask & TGSI_WRITEMASK_X) 00305 flags |= A0_DEST_CHANNEL_X; 00306 if (writeMask & TGSI_WRITEMASK_Y) 00307 flags |= A0_DEST_CHANNEL_Y; 00308 if (writeMask & TGSI_WRITEMASK_Z) 00309 flags |= A0_DEST_CHANNEL_Z; 00310 if (writeMask & TGSI_WRITEMASK_W) 00311 flags |= A0_DEST_CHANNEL_W; 00312
static uint translate_tex_src_target | ( | struct i915_fp_compile * | p, | |
uint | tex | |||
) | [static] |
Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token.
Definition at line 382 of file i915_fpc_translate.c.
00397 { 00398 struct tgsi_full_instruction inst2; 00399
const float cos_constants[4] [static] |
Initial value:
{ 1.0, -1.0f / (2 * 1), 1.0f / (4 * 3 * 2 * 1), -1.0f / (6 * 5 * 4 * 3 * 2 * 1) }
Definition at line 98 of file i915_fpc_translate.c.
unsigned passthrough[] [static] |
Initial value:
{ ( (0x3<<29) |(0x1d<<24)|(0x5<<16)) | ((2*3)-1), ( (0x19<<24) | ( 1 << 19 ) | ( 8 << 14 ) | (0xf<<10) ), 0, 0, ( (0x2<<24) | ( 4 << 19 ) | (0xf<<10) | ( 1 << 7 ) | ( 8 << 2 )), 0x01230000, 0 }
Definition at line 49 of file i915_fpc_translate.c.
const float sin_constants[4] [static] |
Initial value:
{ 1.0, -1.0f / (3 * 2 * 1), 1.0f / (5 * 4 * 3 * 2 * 1), -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) }
Definition at line 91 of file i915_fpc_translate.c.