cell_gen_fp.c File Reference

Include dependency graph for cell_gen_fp.c:

Go to the source code of this file.

Data Structures

struct  codegen
 Context needed during code generation. More...

Defines

#define DISASSEM   01
 Generate SPU fragment program/shader code.

Functions

static int get_itemp (struct codegen *gen)
 Allocate an intermediate temporary register.
static void free_itemps (struct codegen *gen)
 Free all intermediate temporary registers.
static int get_const_one_reg (struct codegen *gen)
 Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
static int get_src_reg (struct codegen *gen, int channel, const struct tgsi_full_src_register *src)
 Return the index of the SPU temporary containing the named TGSI source register.
static int get_dst_reg (struct codegen *gen, int channel, const struct tgsi_full_dst_register *dest)
 Return the index of an SPE register to use for the given TGSI register.
static void store_dest_reg (struct codegen *gen, int value_reg, int channel, const struct tgsi_full_dst_register *dest)
 When a TGSI instruction is writing to an output register, this function emits the SPE store instruction to store the value_reg.
static boolean emit_MOV (struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean emit_ADD (struct codegen *gen, const struct tgsi_full_instruction *inst)
 Emit addition instructions.
static boolean emit_MUL (struct codegen *gen, const struct tgsi_full_instruction *inst)
 Emit multiply.
static boolean emit_SGT (struct codegen *gen, const struct tgsi_full_instruction *inst)
 Emit set-if-greater-than.
static boolean emit_END (struct codegen *gen)
 Emit END instruction.
static boolean emit_instruction (struct codegen *gen, const struct tgsi_full_instruction *inst)
 Emit code for the given instruction.
static void emit_declaration (struct codegen *gen, const struct tgsi_full_declaration *decl)
 Emit "code" for a TGSI declaration.
boolean cell_gen_fragment_program (struct cell_context *cell, const struct tgsi_token *tokens, struct spe_function *f)
 Translate TGSI shader code to SPE instructions.


Define Documentation

#define DISASSEM   01

Generate SPU fragment program/shader code.

Note that we generate SOA-style code here. So each TGSI instruction operates on four pixels (and is translated into four SPU instructions, generally speaking).

Author:
Brian Paul Set to 1 to enable debug/disassembly printfs

Definition at line 55 of file cell_gen_fp.c.


Function Documentation

boolean cell_gen_fragment_program ( struct cell_context cell,
const struct tgsi_token tokens,
struct spe_function f 
)

Translate TGSI shader code to SPE instructions.

This is done when the state tracker gives us a new shader (via pipe->create_fs_state()).

Parameters:
cell the rendering context (in)
tokens the TGSI shader (in)
f the generated function (out)

Definition at line 455 of file cell_gen_fp.c.

References assert, codegen::constants_reg, emit_declaration(), emit_END(), emit_instruction(), codegen::error, codegen::f, tgsi_full_token::FullDeclaration, tgsi_full_token::FullImmediate, tgsi_full_token::FullInstruction, tgsi_parse_context::FullToken, codegen::inputs_reg, spe_function::num_inst, codegen::outputs_reg, spe_allocate_register(), spe_init_func(), SPE_INST_SIZE, SPU_MAX_FRAGMENT_PROGRAM_INSTS, tgsi_dump(), tgsi_parse_end_of_tokens(), tgsi_parse_free(), tgsi_parse_init(), tgsi_parse_token(), TGSI_TOKEN_TYPE_DECLARATION, TGSI_TOKEN_TYPE_IMMEDIATE, TGSI_TOKEN_TYPE_INSTRUCTION, tgsi_full_token::Token, and tgsi_token::Type.

00458 {
00459    struct tgsi_parse_context parse;
00460    struct codegen gen;
00461 
00462    memset(&gen, 0, sizeof(gen));
00463    gen.f = f;
00464 
00465    /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
00466    gen.inputs_reg = 3;     /* pointer to inputs array */
00467    gen.outputs_reg = 4;    /* pointer to outputs array */
00468    gen.constants_reg = 5;  /* pointer to constants array */
00469 
00470    spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
00471    spe_allocate_register(f, gen.inputs_reg);
00472    spe_allocate_register(f, gen.outputs_reg);
00473    spe_allocate_register(f, gen.constants_reg);
00474 
00475 #if DISASSEM
00476    printf("Begin %s\n", __FUNCTION__);
00477    tgsi_dump(tokens, 0);
00478 #endif
00479 
00480    tgsi_parse_init(&parse, tokens);
00481 
00482    while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
00483       tgsi_parse_token(&parse);
00484 
00485       switch (parse.FullToken.Token.Type) {
00486       case TGSI_TOKEN_TYPE_IMMEDIATE:
00487 #if 0
00488          if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
00489             goto fail;
00490 #endif
00491          break;
00492 
00493       case TGSI_TOKEN_TYPE_DECLARATION:
00494          emit_declaration(&gen, &parse.FullToken.FullDeclaration);
00495          break;
00496 
00497       case TGSI_TOKEN_TYPE_INSTRUCTION:
00498          if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
00499             gen.error = true;
00500          }
00501          break;
00502 
00503       default:
00504          assert(0);
00505 
00506       }
00507    }
00508 
00509 
00510    if (gen.error) {
00511       /* terminate the SPE code */
00512       return emit_END(&gen);
00513    }
00514 
00515 #if DISASSEM
00516    printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
00517    printf("End %s\n", __FUNCTION__);
00518 #endif
00519 
00520    tgsi_parse_free( &parse );
00521 
00522    return !gen.error;
00523 }

static boolean emit_ADD ( struct codegen gen,
const struct tgsi_full_instruction inst 
) [static]

Emit addition instructions.

Recall that a single TGSI_OPCODE_ADD becomes (up to) four SPU "fa" instructions because we're doing SOA processing.

Definition at line 262 of file cell_gen_fp.c.

References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_fa(), store_dest_reg(), and tgsi_dst_register::WriteMask.

00263 {
00264    int ch;
00265    /* Loop over Red/Green/Blue/Alpha channels */
00266    for (ch = 0; ch < 4; ch++) {
00267       /* If the dest R, G, B or A writemask is enabled... */
00268       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00269          /* get indexes of the two src, one dest SPE registers */
00270          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00271          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00272          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00273 
00274          /* Emit actual SPE instruction: d = s1 + s2 */
00275          spe_fa(gen->f, d_reg, s1_reg, s2_reg);
00276 #if DISASSEM
00277          printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00278 #endif
00279 
00280          /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
00281          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00282          /* Free any intermediate temps we allocated */
00283          free_itemps(gen);
00284       }
00285    }
00286    return true;
00287 }

static void emit_declaration ( struct codegen gen,
const struct tgsi_full_declaration decl 
) [static]

Emit "code" for a TGSI declaration.

We only care about TGSI TEMPORARY register declarations at this time. For each TGSI TEMPORARY we allocate four SPE registers.

Definition at line 409 of file cell_gen_fp.c.

References tgsi_full_declaration::Declaration, tgsi_full_declaration::DeclarationRange, codegen::f, tgsi_declaration::File, tgsi_declaration_range::First, tgsi_declaration_range::Last, spe_allocate_available_register(), codegen::temp_regs, and TGSI_FILE_TEMPORARY.

00410 {
00411    int i, ch;
00412 
00413    switch (decl->Declaration.File) {
00414    case TGSI_FILE_TEMPORARY:
00415 #if DISASSEM
00416       printf("Declare temp reg %d .. %d\n",
00417              decl->DeclarationRange.First,
00418              decl->DeclarationRange.Last);
00419 #endif
00420       for (i = decl->DeclarationRange.First;
00421            i <= decl->DeclarationRange.Last;
00422            i++) {
00423          for (ch = 0; ch < 4; ch++) {
00424             gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
00425          }
00426 
00427          /* XXX if we run out of SPE registers, we need to spill
00428           * to SPU memory.  someday...
00429           */
00430 
00431 #if DISASSEM
00432          printf("  SPE regs: %d %d %d %d\n",
00433                 gen->temp_regs[i][0],
00434                 gen->temp_regs[i][1],
00435                 gen->temp_regs[i][2],
00436                 gen->temp_regs[i][3]);
00437 #endif
00438       }
00439       break;
00440    default:
00441       ; /* ignore */
00442    }
00443 }

static boolean emit_END ( struct codegen gen  )  [static]

Emit END instruction.

We just return from the shader function at this point.

Note that there may be more code after this that would be called by TGSI_OPCODE_CALL.

Definition at line 362 of file cell_gen_fp.c.

References codegen::f, spe_bi(), and SPE_REG_RA.

00363 {
00364    /* return from function call */
00365    spe_bi(gen->f, SPE_REG_RA, 0, 0);
00366 #if DISASSEM
00367    printf("bi\trRA\n");
00368 #endif
00369    return true;
00370 }

static boolean emit_instruction ( struct codegen gen,
const struct tgsi_full_instruction inst 
) [static]

Emit code for the given instruction.

Just a big switch stmt.

Definition at line 377 of file cell_gen_fp.c.

References emit_ADD(), emit_END(), emit_MOV(), emit_MUL(), emit_SGT(), tgsi_full_instruction::Instruction, tgsi_instruction::Opcode, TGSI_OPCODE_ADD, TGSI_OPCODE_END, TGSI_OPCODE_MOV, TGSI_OPCODE_MUL, and TGSI_OPCODE_SGT.

00379 {
00380    switch (inst->Instruction.Opcode) {
00381    case TGSI_OPCODE_MOV:
00382       return emit_MOV(gen, inst);
00383    case TGSI_OPCODE_MUL:
00384       return emit_MUL(gen, inst);
00385    case TGSI_OPCODE_ADD:
00386       return emit_ADD(gen, inst);
00387    case TGSI_OPCODE_SGT:
00388       return emit_SGT(gen, inst);
00389    case TGSI_OPCODE_END:
00390       return emit_END(gen);
00391 
00392    /* XXX lots more cases to do... */
00393 
00394    default:
00395       return false;
00396    }
00397 
00398    return true;
00399 }

static boolean emit_MOV ( struct codegen gen,
const struct tgsi_full_instruction inst 
) [static]

Definition at line 236 of file cell_gen_fp.c.

References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_move(), store_dest_reg(), and tgsi_dst_register::WriteMask.

00237 {
00238    int ch;
00239    for (ch = 0; ch < 4; ch++) {
00240       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00241          int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00242          int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00243          /* XXX we don't always need to actually emit a mov instruction here */
00244          spe_move(gen->f, dst_reg, src_reg);
00245 #if DISASSEM
00246          printf("mov\tr%d, r%d\n", dst_reg, src_reg);
00247 #endif
00248          store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
00249          free_itemps(gen);
00250       }
00251    }
00252    return true;
00253 }

static boolean emit_MUL ( struct codegen gen,
const struct tgsi_full_instruction inst 
) [static]

Emit multiply.

See emit_ADD for comments.

Definition at line 294 of file cell_gen_fp.c.

References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_dst_reg(), get_src_reg(), spe_fm(), store_dest_reg(), and tgsi_dst_register::WriteMask.

00295 {
00296    int ch;
00297    for (ch = 0; ch < 4; ch++) {
00298       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00299          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00300          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00301          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00302          /* d = s1 * s2 */
00303          spe_fm(gen->f, d_reg, s1_reg, s2_reg);
00304 #if DISASSEM
00305          printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00306 #endif
00307          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00308          free_itemps(gen);
00309       }
00310    }
00311    return true;
00312 }

static boolean emit_SGT ( struct codegen gen,
const struct tgsi_full_instruction inst 
) [static]

Emit set-if-greater-than.

Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as the result but OpenGL/TGSI needs 0.0 and 1.0 results. We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.

Definition at line 322 of file cell_gen_fp.c.

References tgsi_full_dst_register::DstRegister, codegen::f, free_itemps(), tgsi_full_instruction::FullDstRegisters, tgsi_full_instruction::FullSrcRegisters, get_const_one_reg(), get_dst_reg(), get_src_reg(), spe_and(), spe_fcgt(), store_dest_reg(), and tgsi_dst_register::WriteMask.

00323 {
00324    int ch;
00325 
00326    for (ch = 0; ch < 4; ch++) {
00327       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00328          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00329          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00330          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00331 
00332          /* d = (s1 > s2) */
00333          spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
00334 #if DISASSEM
00335          printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00336 #endif
00337 
00338          /* convert d from 0x0/0xffffffff to 0.0/1.0 */
00339          /* d = d & one_reg */
00340          spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
00341 #if DISASSEM
00342          printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
00343 #endif
00344 
00345          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00346          free_itemps(gen);
00347       }
00348    }
00349 
00350    return true;
00351 }

static void free_itemps ( struct codegen gen  )  [static]

Free all intermediate temporary registers.

To be called after each instruction has been emitted.

Definition at line 96 of file cell_gen_fp.c.

References codegen::f, codegen::itemps, codegen::num_itemps, and spe_release_register().

00097 {
00098    int i;
00099    for (i = 0; i < gen->num_itemps; i++) {
00100       spe_release_register(gen->f, gen->itemps[i]);
00101    }
00102    gen->num_itemps = 0;
00103 }

static int get_const_one_reg ( struct codegen gen  )  [static]

Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.

The register is allocated and initialized upon the first call.

Definition at line 111 of file cell_gen_fp.c.

References codegen::f, codegen::one_reg, spe_allocate_available_register(), and spe_load_float().

00112 {
00113    if (gen->one_reg <= 0) {
00114       gen->one_reg = spe_allocate_available_register(gen->f);
00115    }
00116 
00117    /* one = {1.0, 1.0, 1.0, 1.0} */
00118    spe_load_float(gen->f, gen->one_reg, 1.0f);
00119 #if DISASSEM
00120    printf("il\tr%d, 1.0f\n", gen->one_reg);
00121 #endif
00122 
00123    return gen->one_reg;
00124 }

static int get_dst_reg ( struct codegen gen,
int  channel,
const struct tgsi_full_dst_register dest 
) [static]

Return the index of an SPE register to use for the given TGSI register.

If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the corresponding SPE register is returned. If the TGSI register is TGSI_FILE_OUTPUT we allocate an intermediate temporary register. See store_dest_reg() below...

Definition at line 182 of file cell_gen_fp.c.

References assert, tgsi_full_dst_register::DstRegister, tgsi_dst_register::File, get_itemp(), tgsi_dst_register::Index, codegen::temp_regs, TGSI_FILE_OUTPUT, and TGSI_FILE_TEMPORARY.

00185 {
00186    int reg;
00187 
00188    switch (dest->DstRegister.File) {
00189    case TGSI_FILE_TEMPORARY:
00190       reg = gen->temp_regs[dest->DstRegister.Index][channel];
00191       break;
00192    case TGSI_FILE_OUTPUT:
00193       reg = get_itemp(gen);
00194       break;
00195    default:
00196       assert(0);
00197    }
00198 
00199    return reg;
00200 }

static int get_itemp ( struct codegen gen  )  [static]

Allocate an intermediate temporary register.

Definition at line 83 of file cell_gen_fp.c.

References assert, Elements, codegen::f, codegen::itemps, codegen::num_itemps, and spe_allocate_available_register().

00084 {
00085    int t = spe_allocate_available_register(gen->f);
00086    assert(gen->num_itemps < Elements(gen->itemps));
00087    gen->itemps[gen->num_itemps++] = t;
00088    return t;
00089 }

static int get_src_reg ( struct codegen gen,
int  channel,
const struct tgsi_full_src_register src 
) [static]

Return the index of the SPU temporary containing the named TGSI source register.

If the TGSI register is a TGSI_FILE_TEMPORARY we just return the corresponding SPE register. If the TGIS register is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register and emit an SPE load instruction.

Definition at line 135 of file cell_gen_fp.c.

References assert, codegen::f, tgsi_src_register::File, get_itemp(), tgsi_src_register::Index, codegen::inputs_reg, offset(), spe_lqd(), tgsi_full_src_register::SrcRegister, codegen::temp_regs, TGSI_FILE_CONSTANT, TGSI_FILE_IMMEDIATE, TGSI_FILE_INPUT, and TGSI_FILE_TEMPORARY.

00138 {
00139    int reg;
00140 
00141    /* XXX need to examine src swizzle info here.
00142     * That will involve changing the channel var...
00143     */
00144 
00145 
00146    switch (src->SrcRegister.File) {
00147    case TGSI_FILE_TEMPORARY:
00148       reg = gen->temp_regs[src->SrcRegister.Index][channel];
00149       break;
00150    case TGSI_FILE_INPUT:
00151       {
00152          /* offset is measured in quadwords, not bytes */
00153          int offset = src->SrcRegister.Index * 4 + channel;
00154          reg = get_itemp(gen);
00155          /* Load:  reg = memory[(machine_reg) + offset] */
00156          spe_lqd(gen->f, reg, gen->inputs_reg, offset);
00157 #if DISASSEM
00158          printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
00159 #endif
00160       }
00161       break;
00162    case TGSI_FILE_IMMEDIATE:
00163       /* xxx fall-through for now / fix */
00164    case TGSI_FILE_CONSTANT:
00165       /* xxx fall-through for now / fix */
00166    default:
00167       assert(0);
00168    }
00169 
00170    return reg;
00171 }

static void store_dest_reg ( struct codegen gen,
int  value_reg,
int  channel,
const struct tgsi_full_dst_register dest 
) [static]

When a TGSI instruction is writing to an output register, this function emits the SPE store instruction to store the value_reg.

Parameters:
value_reg the SPE register containing the value to store. This would have been returned by get_dst_reg().

Definition at line 210 of file cell_gen_fp.c.

References assert, tgsi_full_dst_register::DstRegister, codegen::f, tgsi_dst_register::File, tgsi_dst_register::Index, offset(), codegen::outputs_reg, spe_stqd(), TGSI_FILE_OUTPUT, and TGSI_FILE_TEMPORARY.

00213 {
00214    switch (dest->DstRegister.File) {
00215    case TGSI_FILE_TEMPORARY:
00216       /* no-op */
00217       break;
00218    case TGSI_FILE_OUTPUT:
00219       {
00220          /* offset is measured in quadwords, not bytes */
00221          int offset = dest->DstRegister.Index * 4 + channel;
00222          /* Store: memory[(machine_reg) + offset] = reg */
00223          spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
00224 #if DISASSEM
00225          printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
00226 #endif
00227       }
00228       break;
00229    default:
00230       assert(0);
00231    }
00232 }


Generated on Tue Sep 29 06:25:30 2009 for Gallium3D by  doxygen 1.5.4