Go to the source code of this file.
Data Structures | |
struct | codegen |
Context needed during code generation. More... | |
Defines | |
#define | DISASSEM 01 |
Generate SPU fragment program/shader code. | |
Functions | |
static int | get_itemp (struct codegen *gen) |
Allocate an intermediate temporary register. | |
static void | free_itemps (struct codegen *gen) |
Free all intermediate temporary registers. | |
static int | get_const_one_reg (struct codegen *gen) |
Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. | |
static int | get_src_reg (struct codegen *gen, int channel, const struct tgsi_full_src_register *src) |
Return the index of the SPU temporary containing the named TGSI source register. | |
static int | get_dst_reg (struct codegen *gen, int channel, const struct tgsi_full_dst_register *dest) |
Return the index of an SPE register to use for the given TGSI register. | |
static void | store_dest_reg (struct codegen *gen, int value_reg, int channel, const struct tgsi_full_dst_register *dest) |
When a TGSI instruction is writing to an output register, this function emits the SPE store instruction to store the value_reg. | |
static boolean | emit_MOV (struct codegen *gen, const struct tgsi_full_instruction *inst) |
static boolean | emit_ADD (struct codegen *gen, const struct tgsi_full_instruction *inst) |
Emit addition instructions. | |
static boolean | emit_MUL (struct codegen *gen, const struct tgsi_full_instruction *inst) |
Emit multiply. | |
static boolean | emit_SGT (struct codegen *gen, const struct tgsi_full_instruction *inst) |
Emit set-if-greater-than. | |
static boolean | emit_END (struct codegen *gen) |
Emit END instruction. | |
static boolean | emit_instruction (struct codegen *gen, const struct tgsi_full_instruction *inst) |
Emit code for the given instruction. | |
static void | emit_declaration (struct codegen *gen, const struct tgsi_full_declaration *decl) |
Emit "code" for a TGSI declaration. | |
boolean | cell_gen_fragment_program (struct cell_context *cell, const struct tgsi_token *tokens, struct spe_function *f) |
Translate TGSI shader code to SPE instructions. |
#define DISASSEM 01 |
Generate SPU fragment program/shader code.
Note that we generate SOA-style code here. So each TGSI instruction operates on four pixels (and is translated into four SPU instructions, generally speaking).
Definition at line 55 of file cell_gen_fp.c.
boolean cell_gen_fragment_program | ( | struct cell_context * | cell, | |
const struct tgsi_token * | tokens, | |||
struct spe_function * | f | |||
) |
Translate TGSI shader code to SPE instructions.
This is done when the state tracker gives us a new shader (via pipe->create_fs_state()).
cell | the rendering context (in) | |
tokens | the TGSI shader (in) | |
f | the generated function (out) |
Definition at line 455 of file cell_gen_fp.c.
References assert, codegen::constants_reg, emit_declaration(), emit_END(), emit_instruction(), codegen::error, codegen::f, tgsi_full_token::FullDeclaration, tgsi_full_token::FullImmediate, tgsi_full_token::FullInstruction, tgsi_parse_context::FullToken, codegen::inputs_reg, spe_function::num_inst, codegen::outputs_reg, spe_allocate_register(), spe_init_func(), SPE_INST_SIZE, SPU_MAX_FRAGMENT_PROGRAM_INSTS, tgsi_dump(), tgsi_parse_end_of_tokens(), tgsi_parse_free(), tgsi_parse_init(), tgsi_parse_token(), TGSI_TOKEN_TYPE_DECLARATION, TGSI_TOKEN_TYPE_IMMEDIATE, TGSI_TOKEN_TYPE_INSTRUCTION, tgsi_full_token::Token, and tgsi_token::Type.
00458 { 00459 struct tgsi_parse_context parse; 00460 struct codegen gen; 00461 00462 memset(&gen, 0, sizeof(gen)); 00463 gen.f = f; 00464 00465 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ 00466 gen.inputs_reg = 3; /* pointer to inputs array */ 00467 gen.outputs_reg = 4; /* pointer to outputs array */ 00468 gen.constants_reg = 5; /* pointer to constants array */ 00469 00470 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); 00471 spe_allocate_register(f, gen.inputs_reg); 00472 spe_allocate_register(f, gen.outputs_reg); 00473 spe_allocate_register(f, gen.constants_reg); 00474 00475 #if DISASSEM 00476 printf("Begin %s\n", __FUNCTION__); 00477 tgsi_dump(tokens, 0); 00478 #endif 00479 00480 tgsi_parse_init(&parse, tokens); 00481 00482 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { 00483 tgsi_parse_token(&parse); 00484 00485 switch (parse.FullToken.Token.Type) { 00486 case TGSI_TOKEN_TYPE_IMMEDIATE: 00487 #if 0 00488 if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) 00489 goto fail; 00490 #endif 00491 break; 00492 00493 case TGSI_TOKEN_TYPE_DECLARATION: 00494 emit_declaration(&gen, &parse.FullToken.FullDeclaration); 00495 break; 00496 00497 case TGSI_TOKEN_TYPE_INSTRUCTION: 00498 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { 00499 gen.error = true; 00500 } 00501 break; 00502 00503 default: 00504 assert(0); 00505 00506 } 00507 } 00508 00509 00510 if (gen.error) { 00511 /* terminate the SPE code */ 00512 return emit_END(&gen); 00513 } 00514 00515 #if DISASSEM 00516 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); 00517 printf("End %s\n", __FUNCTION__); 00518 #endif 00519 00520 tgsi_parse_free( &parse ); 00521 00522 return !gen.error; 00523 }
static boolean emit_ADD | ( | struct codegen * | gen, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Emit addition instructions.
Recall that a single TGSI_OPCODE_ADD becomes (up to) four SPU "fa" instructions because we're doing SOA processing.
Definition at line 262 of file cell_gen_fp.c.
References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_fa(), store_dest_reg(), and tgsi_dst_register::WriteMask.
00263 { 00264 int ch; 00265 /* Loop over Red/Green/Blue/Alpha channels */ 00266 for (ch = 0; ch < 4; ch++) { 00267 /* If the dest R, G, B or A writemask is enabled... */ 00268 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { 00269 /* get indexes of the two src, one dest SPE registers */ 00270 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); 00271 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); 00272 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); 00273 00274 /* Emit actual SPE instruction: d = s1 + s2 */ 00275 spe_fa(gen->f, d_reg, s1_reg, s2_reg); 00276 #if DISASSEM 00277 printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); 00278 #endif 00279 00280 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ 00281 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); 00282 /* Free any intermediate temps we allocated */ 00283 free_itemps(gen); 00284 } 00285 } 00286 return true; 00287 }
static void emit_declaration | ( | struct codegen * | gen, | |
const struct tgsi_full_declaration * | decl | |||
) | [static] |
Emit "code" for a TGSI declaration.
We only care about TGSI TEMPORARY register declarations at this time. For each TGSI TEMPORARY we allocate four SPE registers.
Definition at line 409 of file cell_gen_fp.c.
References tgsi_full_declaration::Declaration, tgsi_full_declaration::DeclarationRange, codegen::f, tgsi_declaration::File, tgsi_declaration_range::First, tgsi_declaration_range::Last, spe_allocate_available_register(), codegen::temp_regs, and TGSI_FILE_TEMPORARY.
00410 { 00411 int i, ch; 00412 00413 switch (decl->Declaration.File) { 00414 case TGSI_FILE_TEMPORARY: 00415 #if DISASSEM 00416 printf("Declare temp reg %d .. %d\n", 00417 decl->DeclarationRange.First, 00418 decl->DeclarationRange.Last); 00419 #endif 00420 for (i = decl->DeclarationRange.First; 00421 i <= decl->DeclarationRange.Last; 00422 i++) { 00423 for (ch = 0; ch < 4; ch++) { 00424 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); 00425 } 00426 00427 /* XXX if we run out of SPE registers, we need to spill 00428 * to SPU memory. someday... 00429 */ 00430 00431 #if DISASSEM 00432 printf(" SPE regs: %d %d %d %d\n", 00433 gen->temp_regs[i][0], 00434 gen->temp_regs[i][1], 00435 gen->temp_regs[i][2], 00436 gen->temp_regs[i][3]); 00437 #endif 00438 } 00439 break; 00440 default: 00441 ; /* ignore */ 00442 } 00443 }
Emit END instruction.
We just return from the shader function at this point.
Note that there may be more code after this that would be called by TGSI_OPCODE_CALL.
Definition at line 362 of file cell_gen_fp.c.
References codegen::f, spe_bi(), and SPE_REG_RA.
00363 { 00364 /* return from function call */ 00365 spe_bi(gen->f, SPE_REG_RA, 0, 0); 00366 #if DISASSEM 00367 printf("bi\trRA\n"); 00368 #endif 00369 return true; 00370 }
static boolean emit_instruction | ( | struct codegen * | gen, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Emit code for the given instruction.
Just a big switch stmt.
Definition at line 377 of file cell_gen_fp.c.
References emit_ADD(), emit_END(), emit_MOV(), emit_MUL(), emit_SGT(), tgsi_full_instruction::Instruction, tgsi_instruction::Opcode, TGSI_OPCODE_ADD, TGSI_OPCODE_END, TGSI_OPCODE_MOV, TGSI_OPCODE_MUL, and TGSI_OPCODE_SGT.
00379 { 00380 switch (inst->Instruction.Opcode) { 00381 case TGSI_OPCODE_MOV: 00382 return emit_MOV(gen, inst); 00383 case TGSI_OPCODE_MUL: 00384 return emit_MUL(gen, inst); 00385 case TGSI_OPCODE_ADD: 00386 return emit_ADD(gen, inst); 00387 case TGSI_OPCODE_SGT: 00388 return emit_SGT(gen, inst); 00389 case TGSI_OPCODE_END: 00390 return emit_END(gen); 00391 00392 /* XXX lots more cases to do... */ 00393 00394 default: 00395 return false; 00396 } 00397 00398 return true; 00399 }
static boolean emit_MOV | ( | struct codegen * | gen, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Definition at line 236 of file cell_gen_fp.c.
References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_move(), store_dest_reg(), and tgsi_dst_register::WriteMask.
00237 { 00238 int ch; 00239 for (ch = 0; ch < 4; ch++) { 00240 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { 00241 int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); 00242 int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); 00243 /* XXX we don't always need to actually emit a mov instruction here */ 00244 spe_move(gen->f, dst_reg, src_reg); 00245 #if DISASSEM 00246 printf("mov\tr%d, r%d\n", dst_reg, src_reg); 00247 #endif 00248 store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); 00249 free_itemps(gen); 00250 } 00251 } 00252 return true; 00253 }
static boolean emit_MUL | ( | struct codegen * | gen, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Emit multiply.
See emit_ADD for comments.
Definition at line 294 of file cell_gen_fp.c.
References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_fm(), store_dest_reg(), and tgsi_dst_register::WriteMask.
00295 { 00296 int ch; 00297 for (ch = 0; ch < 4; ch++) { 00298 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { 00299 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); 00300 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); 00301 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); 00302 /* d = s1 * s2 */ 00303 spe_fm(gen->f, d_reg, s1_reg, s2_reg); 00304 #if DISASSEM 00305 printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); 00306 #endif 00307 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); 00308 free_itemps(gen); 00309 } 00310 } 00311 return true; 00312 }
static boolean emit_SGT | ( | struct codegen * | gen, | |
const struct tgsi_full_instruction * | inst | |||
) | [static] |
Emit set-if-greater-than.
Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as the result but OpenGL/TGSI needs 0.0 and 1.0 results. We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
Definition at line 322 of file cell_gen_fp.c.
References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_const_one_reg(), get_dst_reg(), get_src_reg(), spe_and(), spe_fcgt(), store_dest_reg(), and tgsi_dst_register::WriteMask.
00323 { 00324 int ch; 00325 00326 for (ch = 0; ch < 4; ch++) { 00327 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { 00328 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); 00329 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); 00330 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); 00331 00332 /* d = (s1 > s2) */ 00333 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); 00334 #if DISASSEM 00335 printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); 00336 #endif 00337 00338 /* convert d from 0x0/0xffffffff to 0.0/1.0 */ 00339 /* d = d & one_reg */ 00340 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); 00341 #if DISASSEM 00342 printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); 00343 #endif 00344 00345 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); 00346 free_itemps(gen); 00347 } 00348 } 00349 00350 return true; 00351 }
static void free_itemps | ( | struct codegen * | gen | ) | [static] |
Free all intermediate temporary registers.
To be called after each instruction has been emitted.
Definition at line 96 of file cell_gen_fp.c.
References codegen::f, codegen::itemps, codegen::num_itemps, and spe_release_register().
00097 { 00098 int i; 00099 for (i = 0; i < gen->num_itemps; i++) { 00100 spe_release_register(gen->f, gen->itemps[i]); 00101 } 00102 gen->num_itemps = 0; 00103 }
static int get_const_one_reg | ( | struct codegen * | gen | ) | [static] |
Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
The register is allocated and initialized upon the first call.
Definition at line 111 of file cell_gen_fp.c.
References codegen::f, codegen::one_reg, spe_allocate_available_register(), and spe_load_float().
00112 { 00113 if (gen->one_reg <= 0) { 00114 gen->one_reg = spe_allocate_available_register(gen->f); 00115 } 00116 00117 /* one = {1.0, 1.0, 1.0, 1.0} */ 00118 spe_load_float(gen->f, gen->one_reg, 1.0f); 00119 #if DISASSEM 00120 printf("il\tr%d, 1.0f\n", gen->one_reg); 00121 #endif 00122 00123 return gen->one_reg; 00124 }
static int get_dst_reg | ( | struct codegen * | gen, | |
int | channel, | |||
const struct tgsi_full_dst_register * | dest | |||
) | [static] |
Return the index of an SPE register to use for the given TGSI register.
If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the corresponding SPE register is returned. If the TGSI register is TGSI_FILE_OUTPUT we allocate an intermediate temporary register. See store_dest_reg() below...
Definition at line 182 of file cell_gen_fp.c.
References assert, tgsi_full_dst_register::DstRegister, tgsi_dst_register::File, get_itemp(), tgsi_dst_register::Index, codegen::temp_regs, TGSI_FILE_OUTPUT, and TGSI_FILE_TEMPORARY.
00185 { 00186 int reg; 00187 00188 switch (dest->DstRegister.File) { 00189 case TGSI_FILE_TEMPORARY: 00190 reg = gen->temp_regs[dest->DstRegister.Index][channel]; 00191 break; 00192 case TGSI_FILE_OUTPUT: 00193 reg = get_itemp(gen); 00194 break; 00195 default: 00196 assert(0); 00197 } 00198 00199 return reg; 00200 }
static int get_itemp | ( | struct codegen * | gen | ) | [static] |
Allocate an intermediate temporary register.
Definition at line 83 of file cell_gen_fp.c.
References assert, Elements, codegen::f, codegen::itemps, codegen::num_itemps, and spe_allocate_available_register().
00084 { 00085 int t = spe_allocate_available_register(gen->f); 00086 assert(gen->num_itemps < Elements(gen->itemps)); 00087 gen->itemps[gen->num_itemps++] = t; 00088 return t; 00089 }
static int get_src_reg | ( | struct codegen * | gen, | |
int | channel, | |||
const struct tgsi_full_src_register * | src | |||
) | [static] |
Return the index of the SPU temporary containing the named TGSI source register.
If the TGSI register is a TGSI_FILE_TEMPORARY we just return the corresponding SPE register. If the TGIS register is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register and emit an SPE load instruction.
Definition at line 135 of file cell_gen_fp.c.
References assert, codegen::f, tgsi_src_register::File, get_itemp(), tgsi_src_register::Index, codegen::inputs_reg, offset(), spe_lqd(), tgsi_full_src_register::SrcRegister, codegen::temp_regs, TGSI_FILE_CONSTANT, TGSI_FILE_IMMEDIATE, TGSI_FILE_INPUT, and TGSI_FILE_TEMPORARY.
00138 { 00139 int reg; 00140 00141 /* XXX need to examine src swizzle info here. 00142 * That will involve changing the channel var... 00143 */ 00144 00145 00146 switch (src->SrcRegister.File) { 00147 case TGSI_FILE_TEMPORARY: 00148 reg = gen->temp_regs[src->SrcRegister.Index][channel]; 00149 break; 00150 case TGSI_FILE_INPUT: 00151 { 00152 /* offset is measured in quadwords, not bytes */ 00153 int offset = src->SrcRegister.Index * 4 + channel; 00154 reg = get_itemp(gen); 00155 /* Load: reg = memory[(machine_reg) + offset] */ 00156 spe_lqd(gen->f, reg, gen->inputs_reg, offset); 00157 #if DISASSEM 00158 printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); 00159 #endif 00160 } 00161 break; 00162 case TGSI_FILE_IMMEDIATE: 00163 /* xxx fall-through for now / fix */ 00164 case TGSI_FILE_CONSTANT: 00165 /* xxx fall-through for now / fix */ 00166 default: 00167 assert(0); 00168 } 00169 00170 return reg; 00171 }
static void store_dest_reg | ( | struct codegen * | gen, | |
int | value_reg, | |||
int | channel, | |||
const struct tgsi_full_dst_register * | dest | |||
) | [static] |
When a TGSI instruction is writing to an output register, this function emits the SPE store instruction to store the value_reg.
value_reg | the SPE register containing the value to store. This would have been returned by get_dst_reg(). |
Definition at line 210 of file cell_gen_fp.c.
References assert, tgsi_full_dst_register::DstRegister, codegen::f, tgsi_dst_register::File, tgsi_dst_register::Index, offset(), codegen::outputs_reg, spe_stqd(), TGSI_FILE_OUTPUT, and TGSI_FILE_TEMPORARY.
00213 { 00214 switch (dest->DstRegister.File) { 00215 case TGSI_FILE_TEMPORARY: 00216 /* no-op */ 00217 break; 00218 case TGSI_FILE_OUTPUT: 00219 { 00220 /* offset is measured in quadwords, not bytes */ 00221 int offset = dest->DstRegister.Index * 4 + channel; 00222 /* Store: memory[(machine_reg) + offset] = reg */ 00223 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); 00224 #if DISASSEM 00225 printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); 00226 #endif 00227 } 00228 break; 00229 default: 00230 assert(0); 00231 } 00232 }