cell_gen_fp.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028 
00029 
00041 #include "pipe/p_defines.h"
00042 #include "pipe/p_state.h"
00043 #include "pipe/p_shader_tokens.h"
00044 #include "tgsi/tgsi_parse.h"
00045 #include "tgsi/tgsi_util.h"
00046 #include "tgsi/tgsi_exec.h"
00047 #include "tgsi/tgsi_dump.h"
00048 #include "rtasm/rtasm_ppc_spe.h"
00049 #include "util/u_memory.h"
00050 #include "cell_context.h"
00051 #include "cell_gen_fp.h"
00052 
00053 
00055 #define DISASSEM 01
00056 
00057 
00061 struct codegen
00062 {
00063    int inputs_reg;      
00064    int outputs_reg;     
00065    int constants_reg;   
00066    int temp_regs[8][4]; 
00068    int one_reg;         
00071    int num_itemps;
00072    int itemps[3];
00073 
00074    struct spe_function *f;
00075    boolean error;
00076 };
00077 
00078 
00082 static int
00083 get_itemp(struct codegen *gen)
00084 {
00085    int t = spe_allocate_available_register(gen->f);
00086    assert(gen->num_itemps < Elements(gen->itemps));
00087    gen->itemps[gen->num_itemps++] = t;
00088    return t;
00089 }
00090 
00095 static void
00096 free_itemps(struct codegen *gen)
00097 {
00098    int i;
00099    for (i = 0; i < gen->num_itemps; i++) {
00100       spe_release_register(gen->f, gen->itemps[i]);
00101    }
00102    gen->num_itemps = 0;
00103 }
00104 
00105 
00110 static int
00111 get_const_one_reg(struct codegen *gen)
00112 {
00113    if (gen->one_reg <= 0) {
00114       gen->one_reg = spe_allocate_available_register(gen->f);
00115    }
00116 
00117    /* one = {1.0, 1.0, 1.0, 1.0} */
00118    spe_load_float(gen->f, gen->one_reg, 1.0f);
00119 #if DISASSEM
00120    printf("il\tr%d, 1.0f\n", gen->one_reg);
00121 #endif
00122 
00123    return gen->one_reg;
00124 }
00125 
00126 
00134 static int
00135 get_src_reg(struct codegen *gen,
00136             int channel,
00137             const struct tgsi_full_src_register *src)
00138 {
00139    int reg;
00140 
00141    /* XXX need to examine src swizzle info here.
00142     * That will involve changing the channel var...
00143     */
00144 
00145 
00146    switch (src->SrcRegister.File) {
00147    case TGSI_FILE_TEMPORARY:
00148       reg = gen->temp_regs[src->SrcRegister.Index][channel];
00149       break;
00150    case TGSI_FILE_INPUT:
00151       {
00152          /* offset is measured in quadwords, not bytes */
00153          int offset = src->SrcRegister.Index * 4 + channel;
00154          reg = get_itemp(gen);
00155          /* Load:  reg = memory[(machine_reg) + offset] */
00156          spe_lqd(gen->f, reg, gen->inputs_reg, offset);
00157 #if DISASSEM
00158          printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
00159 #endif
00160       }
00161       break;
00162    case TGSI_FILE_IMMEDIATE:
00163       /* xxx fall-through for now / fix */
00164    case TGSI_FILE_CONSTANT:
00165       /* xxx fall-through for now / fix */
00166    default:
00167       assert(0);
00168    }
00169 
00170    return reg;
00171 }
00172 
00173 
00181 static int
00182 get_dst_reg(struct codegen *gen,
00183             int channel,
00184             const struct tgsi_full_dst_register *dest)
00185 {
00186    int reg;
00187 
00188    switch (dest->DstRegister.File) {
00189    case TGSI_FILE_TEMPORARY:
00190       reg = gen->temp_regs[dest->DstRegister.Index][channel];
00191       break;
00192    case TGSI_FILE_OUTPUT:
00193       reg = get_itemp(gen);
00194       break;
00195    default:
00196       assert(0);
00197    }
00198 
00199    return reg;
00200 }
00201 
00202 
00209 static void
00210 store_dest_reg(struct codegen *gen,
00211                int value_reg, int channel,
00212                const struct tgsi_full_dst_register *dest)
00213 {
00214    switch (dest->DstRegister.File) {
00215    case TGSI_FILE_TEMPORARY:
00216       /* no-op */
00217       break;
00218    case TGSI_FILE_OUTPUT:
00219       {
00220          /* offset is measured in quadwords, not bytes */
00221          int offset = dest->DstRegister.Index * 4 + channel;
00222          /* Store: memory[(machine_reg) + offset] = reg */
00223          spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
00224 #if DISASSEM
00225          printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
00226 #endif
00227       }
00228       break;
00229    default:
00230       assert(0);
00231    }
00232 }
00233 
00234 
00235 static boolean
00236 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
00237 {
00238    int ch;
00239    for (ch = 0; ch < 4; ch++) {
00240       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00241          int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00242          int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00243          /* XXX we don't always need to actually emit a mov instruction here */
00244          spe_move(gen->f, dst_reg, src_reg);
00245 #if DISASSEM
00246          printf("mov\tr%d, r%d\n", dst_reg, src_reg);
00247 #endif
00248          store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
00249          free_itemps(gen);
00250       }
00251    }
00252    return true;
00253 }
00254 
00255 
00261 static boolean
00262 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
00263 {
00264    int ch;
00265    /* Loop over Red/Green/Blue/Alpha channels */
00266    for (ch = 0; ch < 4; ch++) {
00267       /* If the dest R, G, B or A writemask is enabled... */
00268       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00269          /* get indexes of the two src, one dest SPE registers */
00270          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00271          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00272          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00273 
00274          /* Emit actual SPE instruction: d = s1 + s2 */
00275          spe_fa(gen->f, d_reg, s1_reg, s2_reg);
00276 #if DISASSEM
00277          printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00278 #endif
00279 
00280          /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
00281          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00282          /* Free any intermediate temps we allocated */
00283          free_itemps(gen);
00284       }
00285    }
00286    return true;
00287 }
00288 
00289 
00293 static boolean
00294 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
00295 {
00296    int ch;
00297    for (ch = 0; ch < 4; ch++) {
00298       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00299          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00300          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00301          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00302          /* d = s1 * s2 */
00303          spe_fm(gen->f, d_reg, s1_reg, s2_reg);
00304 #if DISASSEM
00305          printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00306 #endif
00307          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00308          free_itemps(gen);
00309       }
00310    }
00311    return true;
00312 }
00313 
00314 
00321 static boolean
00322 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
00323 {
00324    int ch;
00325 
00326    for (ch = 0; ch < 4; ch++) {
00327       if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
00328          int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
00329          int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
00330          int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
00331 
00332          /* d = (s1 > s2) */
00333          spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
00334 #if DISASSEM
00335          printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
00336 #endif
00337 
00338          /* convert d from 0x0/0xffffffff to 0.0/1.0 */
00339          /* d = d & one_reg */
00340          spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
00341 #if DISASSEM
00342          printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
00343 #endif
00344 
00345          store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
00346          free_itemps(gen);
00347       }
00348    }
00349 
00350    return true;
00351 }
00352 
00353 
00361 static boolean
00362 emit_END(struct codegen *gen)
00363 {
00364    /* return from function call */
00365    spe_bi(gen->f, SPE_REG_RA, 0, 0);
00366 #if DISASSEM
00367    printf("bi\trRA\n");
00368 #endif
00369    return true;
00370 }
00371 
00372 
00376 static boolean
00377 emit_instruction(struct codegen *gen,
00378                  const struct tgsi_full_instruction *inst)
00379 {
00380    switch (inst->Instruction.Opcode) {
00381    case TGSI_OPCODE_MOV:
00382       return emit_MOV(gen, inst);
00383    case TGSI_OPCODE_MUL:
00384       return emit_MUL(gen, inst);
00385    case TGSI_OPCODE_ADD:
00386       return emit_ADD(gen, inst);
00387    case TGSI_OPCODE_SGT:
00388       return emit_SGT(gen, inst);
00389    case TGSI_OPCODE_END:
00390       return emit_END(gen);
00391 
00392    /* XXX lots more cases to do... */
00393 
00394    default:
00395       return false;
00396    }
00397 
00398    return true;
00399 }
00400 
00401 
00402 
00408 static void
00409 emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
00410 {
00411    int i, ch;
00412 
00413    switch (decl->Declaration.File) {
00414    case TGSI_FILE_TEMPORARY:
00415 #if DISASSEM
00416       printf("Declare temp reg %d .. %d\n",
00417              decl->DeclarationRange.First,
00418              decl->DeclarationRange.Last);
00419 #endif
00420       for (i = decl->DeclarationRange.First;
00421            i <= decl->DeclarationRange.Last;
00422            i++) {
00423          for (ch = 0; ch < 4; ch++) {
00424             gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
00425          }
00426 
00427          /* XXX if we run out of SPE registers, we need to spill
00428           * to SPU memory.  someday...
00429           */
00430 
00431 #if DISASSEM
00432          printf("  SPE regs: %d %d %d %d\n",
00433                 gen->temp_regs[i][0],
00434                 gen->temp_regs[i][1],
00435                 gen->temp_regs[i][2],
00436                 gen->temp_regs[i][3]);
00437 #endif
00438       }
00439       break;
00440    default:
00441       ; /* ignore */
00442    }
00443 }
00444 
00445 
00454 boolean
00455 cell_gen_fragment_program(struct cell_context *cell,
00456                           const struct tgsi_token *tokens,
00457                           struct spe_function *f)
00458 {
00459    struct tgsi_parse_context parse;
00460    struct codegen gen;
00461 
00462    memset(&gen, 0, sizeof(gen));
00463    gen.f = f;
00464 
00465    /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
00466    gen.inputs_reg = 3;     /* pointer to inputs array */
00467    gen.outputs_reg = 4;    /* pointer to outputs array */
00468    gen.constants_reg = 5;  /* pointer to constants array */
00469 
00470    spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
00471    spe_allocate_register(f, gen.inputs_reg);
00472    spe_allocate_register(f, gen.outputs_reg);
00473    spe_allocate_register(f, gen.constants_reg);
00474 
00475 #if DISASSEM
00476    printf("Begin %s\n", __FUNCTION__);
00477    tgsi_dump(tokens, 0);
00478 #endif
00479 
00480    tgsi_parse_init(&parse, tokens);
00481 
00482    while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
00483       tgsi_parse_token(&parse);
00484 
00485       switch (parse.FullToken.Token.Type) {
00486       case TGSI_TOKEN_TYPE_IMMEDIATE:
00487 #if 0
00488          if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
00489             goto fail;
00490 #endif
00491          break;
00492 
00493       case TGSI_TOKEN_TYPE_DECLARATION:
00494          emit_declaration(&gen, &parse.FullToken.FullDeclaration);
00495          break;
00496 
00497       case TGSI_TOKEN_TYPE_INSTRUCTION:
00498          if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
00499             gen.error = true;
00500          }
00501          break;
00502 
00503       default:
00504          assert(0);
00505 
00506       }
00507    }
00508 
00509 
00510    if (gen.error) {
00511       /* terminate the SPE code */
00512       return emit_END(&gen);
00513    }
00514 
00515 #if DISASSEM
00516    printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
00517    printf("End %s\n", __FUNCTION__);
00518 #endif
00519 
00520    tgsi_parse_free( &parse );
00521 
00522    return !gen.error;
00523 }

Generated on Tue Sep 29 06:25:15 2009 for Gallium3D by  doxygen 1.5.4