cell_gen_fragment.c File Reference

Include dependency graph for cell_gen_fragment.c:

Go to the source code of this file.

Defines

#define OPTIMIZATIONS   1
 Generate SPU per-fragment code (actually per-quad code).

Functions

static void gen_depth_test (const struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
 Generate SPE code to perform Z/depth testing.
static void gen_alpha_test (const struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask_reg, int fragA_reg)
 Generate SPE code to perform alpha testing.
static void gen_blend (const struct pipe_blend_state *blend, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, int fbRGBA_reg)
 Generate SPE code to implement the given blend mode for a quad of pixels.
static void gen_logicop (const struct pipe_blend_state *blend, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg)
static void gen_colormask (uint colormask, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg)
static void gen_pack_colors (struct spe_function *f, enum pipe_format color_format, int r_reg, int g_reg, int b_reg, int a_reg, int rgba_reg)
 Generate code to pack a quad of float colors into a four 32-bit integers.
void cell_gen_fragment_function (struct cell_context *cell, struct spe_function *f)
 Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.


Define Documentation

#define OPTIMIZATIONS   1

Generate SPU per-fragment code (actually per-quad code).

Author:
Brian Paul Do extra optimizations?

Definition at line 45 of file cell_gen_fragment.c.


Function Documentation

void cell_gen_fragment_function ( struct cell_context cell,
struct spe_function f 
)

Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.

Logically, this code will be called after running the fragment shader. But under some circumstances we could run some of this code before the fragment shader to cull fragments/quads that are totally occluded/discarded.

XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.

See the spu_default_fragment_ops() function to see how the per-fragment operations would be done with ordinary C code. The code we generate here though has no branches, is SIMD, etc and should be much faster.

Parameters:
cell the rendering context (in)
f the generated function (out)

< framebuffer's RGBA colors for quad

< framebuffer's combined z/stencil values for quad

Definition at line 627 of file cell_gen_fragment.c.

References pipe_depth_stencil_alpha_state::alpha, ASSERT, cell_blend_state::base, cell_depth_stencil_alpha_state::base, cell_context::blend, pipe_blend_state::blend_enable, pipe_framebuffer_state::cbufs, pipe_blend_state::colormask, pipe_depth_stencil_alpha_state::depth, cell_context::depth_stencil, pipe_stencil_state::enabled, pipe_depth_state::enabled, pipe_alpha_state::enabled, pipe_surface::format, cell_context::framebuffer, gen_alpha_test(), gen_blend(), gen_colormask(), gen_depth_test(), gen_logicop(), gen_pack_colors(), pipe_blend_state::logicop_enable, PIPE_FORMAT_S8_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z24X8_UNORM, spe_a(), spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_bi(), spe_cfltu(), spe_fm(), spe_fsmbi(), spe_init_func(), SPE_INST_SIZE, spe_load_float(), spe_lqx(), spe_or(), SPE_REG_RA, spe_release_register(), spe_rotmi(), spe_selb(), spe_shli(), spe_stqx(), SPU_MAX_FRAGMENT_OPS_INSTS, pipe_depth_stencil_alpha_state::stencil, TILE_SIZE, pipe_stencil_state::write_mask, pipe_depth_state::writemask, and pipe_framebuffer_state::zsbuf.

00628 {
00629    const struct pipe_depth_stencil_alpha_state *dsa =
00630       &cell->depth_stencil->base;
00631    const struct pipe_blend_state *blend = &cell->blend->base;
00632    const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
00633 
00634    /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
00635    const int x_reg = 3;  /* uint */
00636    const int y_reg = 4;  /* uint */
00637    const int color_tile_reg = 5;  /* tile_t * */
00638    const int depth_tile_reg = 6;  /* tile_t * */
00639    const int fragZ_reg = 7;   /* vector float */
00640    const int fragR_reg = 8;   /* vector float */
00641    const int fragG_reg = 9;   /* vector float */
00642    const int fragB_reg = 10;  /* vector float */
00643    const int fragA_reg = 11;  /* vector float */
00644    const int mask_reg = 12;   /* vector uint */
00645 
00646    /* offset of quad from start of tile
00647     * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
00648     */
00649    int quad_offset_reg;
00650 
00651    int fbRGBA_reg;  
00652    int fbZS_reg;    
00654    spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
00655    spe_allocate_register(f, x_reg);
00656    spe_allocate_register(f, y_reg);
00657    spe_allocate_register(f, color_tile_reg);
00658    spe_allocate_register(f, depth_tile_reg);
00659    spe_allocate_register(f, fragZ_reg);
00660    spe_allocate_register(f, fragR_reg);
00661    spe_allocate_register(f, fragG_reg);
00662    spe_allocate_register(f, fragB_reg);
00663    spe_allocate_register(f, fragA_reg);
00664    spe_allocate_register(f, mask_reg);
00665 
00666    quad_offset_reg = spe_allocate_available_register(f);
00667    fbRGBA_reg = spe_allocate_available_register(f);
00668    fbZS_reg = spe_allocate_available_register(f);
00669 
00670    /* compute offset of quad from start of tile, in bytes */
00671    {
00672       int x2_reg = spe_allocate_available_register(f);
00673       int y2_reg = spe_allocate_available_register(f);
00674 
00675       ASSERT(TILE_SIZE == 32);
00676 
00677       spe_rotmi(f, x2_reg, x_reg, -1);  /* x2 = x / 2 */
00678       spe_rotmi(f, y2_reg, y_reg, -1);  /* y2 = y / 2 */
00679       spe_shli(f, y2_reg, y2_reg, 4);   /* y2 *= 16 */
00680       spe_a(f, quad_offset_reg, y2_reg, x2_reg);  /* offset = y2 + x2 */
00681       spe_shli(f, quad_offset_reg, quad_offset_reg, 4);   /* offset *= 16 */
00682 
00683       spe_release_register(f, x2_reg);
00684       spe_release_register(f, y2_reg);
00685    }
00686 
00687 
00688    if (dsa->alpha.enabled) {
00689       gen_alpha_test(dsa, f, mask_reg, fragA_reg);
00690    }
00691 
00692    if (dsa->depth.enabled || dsa->stencil[0].enabled) {
00693       const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
00694       boolean write_depth_stencil;
00695 
00696       int fbZ_reg = spe_allocate_available_register(f); /* Z values */
00697       int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
00698 
00699       /* fetch quad of depth/stencil values from tile at (x,y) */
00700       /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
00701       spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00702 
00703       if (dsa->depth.enabled) {
00704          /* Extract Z bits from fbZS_reg into fbZ_reg */
00705          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00706              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00707             int mask_reg = spe_allocate_available_register(f);
00708             spe_fsmbi(f, mask_reg, 0x7777);  /* mask[0,1,2,3] = 0x00ffffff */
00709             spe_and(f, fbZ_reg, fbZS_reg, mask_reg);  /* fbZ = fbZS & mask */
00710             spe_release_register(f, mask_reg);
00711             /* OK, fbZ_reg has four 24-bit Z values now */
00712          }
00713          else {
00714             /* XXX handle other z/stencil formats */
00715             ASSERT(0);
00716          }
00717 
00718          /* Convert fragZ values from float[4] to uint[4] */
00719          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00720              zs_format == PIPE_FORMAT_X8Z24_UNORM ||
00721              zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00722              zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00723             /* 24-bit Z values */
00724             int scale_reg = spe_allocate_available_register(f);
00725 
00726             /* scale_reg[0,1,2,3] = float(2^24-1) */
00727             spe_load_float(f, scale_reg, (float) 0xffffff);
00728 
00729             /* XXX these two instructions might be combined */
00730             spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
00731             spe_cfltu(f, fragZ_reg, fragZ_reg, 0);  /* fragZ = (int) fragZ */
00732 
00733             spe_release_register(f, scale_reg);
00734          }
00735          else {
00736             /* XXX handle 16-bit Z format */
00737             ASSERT(0);
00738          }
00739       }
00740 
00741       if (dsa->stencil[0].enabled) {
00742          /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
00743          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00744              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00745             /* XXX extract with a shift */
00746             ASSERT(0);
00747          }
00748          else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00749                   zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00750             /* XXX extract with a mask */
00751             ASSERT(0);
00752          }
00753       }
00754 
00755 
00756       if (dsa->stencil[0].enabled) {
00757          /* XXX this may involve depth testing too */
00758          // gen_stencil_test(dsa, f, ... );
00759          ASSERT(0);
00760       }
00761       else if (dsa->depth.enabled) {
00762          int zmask_reg = spe_allocate_available_register(f);
00763          gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
00764          spe_release_register(f, zmask_reg);
00765       }
00766 
00767       /* do we need to write Z and/or Stencil back into framebuffer? */
00768       write_depth_stencil = (dsa->depth.writemask |
00769                              dsa->stencil[0].write_mask |
00770                              dsa->stencil[1].write_mask);
00771 
00772       if (write_depth_stencil) {
00773          /* Merge latest Z and Stencil values into fbZS_reg.
00774           * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
00775           * fbS_reg has four 8-bit Z values in bits [7..0].
00776           */
00777          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00778              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00779             spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
00780             spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
00781          }
00782          else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00783                   zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00784             /* XXX to do */
00785             ASSERT(0);
00786          }
00787          else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
00788             /* XXX to do */
00789             ASSERT(0);
00790          }
00791          else if (zs_format == PIPE_FORMAT_S8_UNORM) {
00792             /* XXX to do */
00793             ASSERT(0);
00794          }
00795          else {
00796             /* bad zs_format */
00797             ASSERT(0);
00798          }
00799 
00800          /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
00801          spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00802       }
00803 
00804       spe_release_register(f, fbZ_reg);
00805       spe_release_register(f, fbS_reg);
00806    }
00807 
00808 
00809    /* Get framebuffer quad/colors.  We'll need these for blending,
00810     * color masking, and to obey the quad/pixel mask.
00811     * Load: fbRGBA_reg = memory[color_tile + quad_offset]
00812     * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
00813     * we could skip this load.
00814     */
00815    spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
00816 
00817 
00818    if (blend->blend_enable) {
00819       gen_blend(blend, f, color_format,
00820                 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
00821    }
00822 
00823    /*
00824     * Write fragment colors to framebuffer/tile.
00825     * This involves converting the fragment colors from float[4] to the
00826     * tile's specific format and obeying the quad/pixel mask.
00827     */
00828    {
00829       int rgba_reg = spe_allocate_available_register(f);
00830 
00831       /* Pack four float colors as four 32-bit int colors */
00832       gen_pack_colors(f, color_format,
00833                       fragR_reg, fragG_reg, fragB_reg, fragA_reg,
00834                       rgba_reg);
00835 
00836       if (blend->logicop_enable) {
00837          gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
00838       }
00839 
00840       if (blend->colormask != 0xf) {
00841          gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
00842       }
00843 
00844 
00845       /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
00846        * if (mask[i])
00847        *    rgba[i] = rgba[i];
00848        * else
00849        *    rgba[i] = framebuffer[i];
00850        */
00851       spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
00852 
00853       /* Store updated quad in tile:
00854        * memory[color_tile + quad_offset] = rgba_reg;
00855        */
00856       spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
00857 
00858       spe_release_register(f, rgba_reg);
00859    }
00860 
00861    //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
00862 
00863    spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */
00864 
00865 
00866    spe_release_register(f, fbRGBA_reg);
00867    spe_release_register(f, fbZS_reg);
00868    spe_release_register(f, quad_offset_reg);
00869 }

static void gen_alpha_test ( const struct pipe_depth_stencil_alpha_state dsa,
struct spe_function f,
int  mask_reg,
int  fragA_reg 
) [static]

Generate SPE code to perform alpha testing.

Parameters:
dsa Gallium depth/stencil/alpha state to gen code for
f SPE function to append instruction onto.
mask_reg register containing quad/pixel "alive" mask (in/out)
fragA_reg register containing four fragment alpha values (in)

Definition at line 145 of file cell_gen_fragment.c.

References pipe_depth_stencil_alpha_state::alpha, ASSERT, pipe_alpha_state::enabled, pipe_alpha_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, pipe_alpha_state::ref, spe_allocate_available_register(), spe_and(), spe_andc(), spe_biz(), spe_fceq(), spe_fcgt(), spe_il(), spe_load_float(), spe_orx(), SPE_REG_RA, and spe_release_register().

00147 {
00148    int ref_reg = spe_allocate_available_register(f);
00149    int amask_reg = spe_allocate_available_register(f);
00150 
00151    ASSERT(dsa->alpha.enabled);
00152 
00153    if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
00154        (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
00155       /* load/splat the alpha reference float value */
00156       spe_load_float(f, ref_reg, dsa->alpha.ref);
00157    }
00158 
00159    /* emit code to do the alpha comparison, updating 'mask' */
00160    switch (dsa->alpha.func) {
00161    case PIPE_FUNC_EQUAL:
00162       /* amask = (fragA == ref) */
00163       spe_fceq(f, amask_reg, fragA_reg, ref_reg);
00164       /* mask = (mask & amask) */
00165       spe_and(f, mask_reg, mask_reg, amask_reg);
00166       break;
00167 
00168    case PIPE_FUNC_NOTEQUAL:
00169       /* amask = (fragA == ref) */
00170       spe_fceq(f, amask_reg, fragA_reg, ref_reg);
00171       /* mask = (mask & ~amask) */
00172       spe_andc(f, mask_reg, mask_reg, amask_reg);
00173       break;
00174 
00175    case PIPE_FUNC_GREATER:
00176       /* amask = (fragA > ref) */
00177       spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
00178       /* mask = (mask & amask) */
00179       spe_and(f, mask_reg, mask_reg, amask_reg);
00180       break;
00181 
00182    case PIPE_FUNC_LESS:
00183       /* amask = (ref > fragA) */
00184       spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
00185       /* mask = (mask & amask) */
00186       spe_and(f, mask_reg, mask_reg, amask_reg);
00187       break;
00188 
00189    case PIPE_FUNC_LEQUAL:
00190       /* amask = (fragA > ref) */
00191       spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
00192       /* mask = (mask & ~amask) */
00193       spe_andc(f, mask_reg, mask_reg, amask_reg);
00194       break;
00195 
00196    case PIPE_FUNC_GEQUAL:
00197       /* amask = (ref > fragA) */
00198       spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
00199       /* mask = (mask & ~amask) */
00200       spe_andc(f, mask_reg, mask_reg, amask_reg);
00201       break;
00202 
00203    case PIPE_FUNC_NEVER:
00204       spe_il(f, mask_reg, 0);  /* mask = [0,0,0,0] */
00205       break;
00206 
00207    case PIPE_FUNC_ALWAYS:
00208       /* no-op, mask unchanged */
00209       break;
00210 
00211    default:
00212       ASSERT(0);
00213       break;
00214    }
00215 
00216 #if OPTIMIZATIONS
00217    /* if mask == {0,0,0,0} we're all done, return */
00218    {
00219       /* re-use amask reg here */
00220       int tmp_reg = amask_reg;
00221       /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
00222       spe_orx(f, tmp_reg, mask_reg);
00223       /* if tmp[0] == 0 then return from function call */
00224       spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
00225    }
00226 #endif
00227 
00228    spe_release_register(f, ref_reg);
00229    spe_release_register(f, amask_reg);
00230 }

static void gen_blend ( const struct pipe_blend_state blend,
struct spe_function f,
enum pipe_format  color_format,
int  fragR_reg,
int  fragG_reg,
int  fragB_reg,
int  fragA_reg,
int  fbRGBA_reg 
) [static]

Generate SPE code to implement the given blend mode for a quad of pixels.

Parameters:
f SPE function to append instruction onto.
fragR_reg register with fragment red values (float) (in/out)
fragG_reg register with fragment green values (float) (in/out)
fragB_reg register with fragment blue values (float) (in/out)
fragA_reg register with fragment alpha values (float) (in/out)
fbRGBA_reg register with packed framebuffer colors (integer) (in)

Definition at line 244 of file cell_gen_fragment.c.

References pipe_blend_state::alpha_dst_factor, pipe_blend_state::alpha_func, pipe_blend_state::alpha_src_factor, ASSERT, pipe_blend_state::blend_enable, codegen::one_reg, PIPE_BLEND_ADD, PIPE_BLEND_SUBTRACT, PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC_ALPHA, PIPE_BLENDFACTOR_SRC_COLOR, PIPE_BLENDFACTOR_ZERO, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, pipe_blend_state::rgb_dst_factor, pipe_blend_state::rgb_func, pipe_blend_state::rgb_src_factor, spe_allocate_available_register(), spe_and(), spe_cuflt(), spe_fa(), spe_fm(), spe_fs(), spe_load_float(), spe_load_int(), spe_move(), spe_release_register(), spe_roti(), and spe_zero().

00249 {
00250    int term1R_reg = spe_allocate_available_register(f);
00251    int term1G_reg = spe_allocate_available_register(f);
00252    int term1B_reg = spe_allocate_available_register(f);
00253    int term1A_reg = spe_allocate_available_register(f);
00254 
00255    int term2R_reg = spe_allocate_available_register(f);
00256    int term2G_reg = spe_allocate_available_register(f);
00257    int term2B_reg = spe_allocate_available_register(f);
00258    int term2A_reg = spe_allocate_available_register(f);
00259 
00260    int fbR_reg = spe_allocate_available_register(f);
00261    int fbG_reg = spe_allocate_available_register(f);
00262    int fbB_reg = spe_allocate_available_register(f);
00263    int fbA_reg = spe_allocate_available_register(f);
00264 
00265    int one_reg = spe_allocate_available_register(f);
00266    int tmp_reg = spe_allocate_available_register(f);
00267 
00268    boolean one_reg_set = false; /* avoid setting one_reg more than once */
00269 
00270    ASSERT(blend->blend_enable);
00271 
00272    /* Unpack/convert framebuffer colors from four 32-bit packed colors
00273     * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
00274     * Each 8-bit color component is expanded into a float in [0.0, 1.0].
00275     */
00276    {
00277       int mask_reg = spe_allocate_available_register(f);
00278 
00279       /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
00280       spe_load_int(f, mask_reg, 0xff);
00281 
00282       /* XXX there may be more clever ways to implement the following code */
00283       switch (color_format) {
00284       case PIPE_FORMAT_A8R8G8B8_UNORM:
00285          /* fbB = fbB & mask */
00286          spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
00287          /* mask = mask << 8 */
00288          spe_roti(f, mask_reg, mask_reg, 8);
00289 
00290          /* fbG = fbRGBA & mask */
00291          spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
00292          /* fbG = fbG >> 8 */
00293          spe_roti(f, fbG_reg, fbG_reg, -8);
00294          /* mask = mask << 8 */
00295          spe_roti(f, mask_reg, mask_reg, 8);
00296 
00297          /* fbR = fbRGBA & mask */
00298          spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
00299          /* fbR = fbR >> 16 */
00300          spe_roti(f, fbR_reg, fbR_reg, -16);
00301          /* mask = mask << 8 */
00302          spe_roti(f, mask_reg, mask_reg, 8);
00303 
00304          /* fbA = fbRGBA & mask */
00305          spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
00306          /* fbA = fbA >> 24 */
00307          spe_roti(f, fbA_reg, fbA_reg, -24);
00308          break;
00309 
00310       case PIPE_FORMAT_B8G8R8A8_UNORM:
00311          /* fbA = fbA & mask */
00312          spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
00313          /* mask = mask << 8 */
00314          spe_roti(f, mask_reg, mask_reg, 8);
00315 
00316          /* fbR = fbRGBA & mask */
00317          spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
00318          /* fbR = fbR >> 8 */
00319          spe_roti(f, fbR_reg, fbR_reg, -8);
00320          /* mask = mask << 8 */
00321          spe_roti(f, mask_reg, mask_reg, 8);
00322 
00323          /* fbG = fbRGBA & mask */
00324          spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
00325          /* fbG = fbG >> 16 */
00326          spe_roti(f, fbG_reg, fbG_reg, -16);
00327          /* mask = mask << 8 */
00328          spe_roti(f, mask_reg, mask_reg, 8);
00329 
00330          /* fbB = fbRGBA & mask */
00331          spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
00332          /* fbB = fbB >> 24 */
00333          spe_roti(f, fbB_reg, fbB_reg, -24);
00334          break;
00335 
00336       default:
00337          ASSERT(0);
00338       }
00339 
00340       /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
00341       spe_cuflt(f, fbR_reg, fbR_reg, 8);
00342       spe_cuflt(f, fbG_reg, fbG_reg, 8);
00343       spe_cuflt(f, fbB_reg, fbB_reg, 8);
00344       spe_cuflt(f, fbA_reg, fbA_reg, 8);
00345 
00346       spe_release_register(f, mask_reg);
00347    }
00348 
00349 
00350    /*
00351     * Compute Src RGB terms
00352     */
00353    switch (blend->rgb_src_factor) {
00354    case PIPE_BLENDFACTOR_ONE:
00355       spe_move(f, term1R_reg, fragR_reg);
00356       spe_move(f, term1G_reg, fragG_reg);
00357       spe_move(f, term1B_reg, fragB_reg);
00358       break;
00359    case PIPE_BLENDFACTOR_ZERO:
00360       spe_zero(f, term1R_reg);
00361       spe_zero(f, term1G_reg);
00362       spe_zero(f, term1B_reg);
00363       break;
00364    case PIPE_BLENDFACTOR_SRC_COLOR:
00365       spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
00366       spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
00367       spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
00368       break;
00369    case PIPE_BLENDFACTOR_SRC_ALPHA:
00370       spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
00371       spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
00372       spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
00373       break;
00374       /* XXX more cases */
00375    default:
00376       ASSERT(0);
00377    }
00378 
00379    /*
00380     * Compute Src Alpha term
00381     */
00382    switch (blend->alpha_src_factor) {
00383    case PIPE_BLENDFACTOR_ONE:
00384       spe_move(f, term1A_reg, fragA_reg);
00385       break;
00386    case PIPE_BLENDFACTOR_SRC_COLOR:
00387       spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
00388       break;
00389    case PIPE_BLENDFACTOR_SRC_ALPHA:
00390       spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
00391       break;
00392       /* XXX more cases */
00393    default:
00394       ASSERT(0);
00395    }
00396 
00397    /*
00398     * Compute Dest RGB terms
00399     */
00400    switch (blend->rgb_dst_factor) {
00401    case PIPE_BLENDFACTOR_ONE:
00402       spe_move(f, term2R_reg, fbR_reg);
00403       spe_move(f, term2G_reg, fbG_reg);
00404       spe_move(f, term2B_reg, fbB_reg);
00405       break;
00406    case PIPE_BLENDFACTOR_ZERO:
00407       spe_zero(f, term2R_reg);
00408       spe_zero(f, term2G_reg);
00409       spe_zero(f, term2B_reg);
00410       break;
00411    case PIPE_BLENDFACTOR_SRC_COLOR:
00412       spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
00413       spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
00414       spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
00415       break;
00416    case PIPE_BLENDFACTOR_SRC_ALPHA:
00417       spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
00418       spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
00419       spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
00420       break;
00421    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00422       /* one = {1.0, 1.0, 1.0, 1.0} */
00423       if (!one_reg_set) {
00424          spe_load_float(f, one_reg, 1.0f);
00425          one_reg_set = true;
00426       }
00427       /* tmp = one - fragA */
00428       spe_fs(f, tmp_reg, one_reg, fragA_reg);
00429       /* term = fb * tmp */
00430       spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
00431       spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
00432       spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
00433       break;
00434       /* XXX more cases */
00435    default:
00436       ASSERT(0);
00437    }
00438 
00439    /*
00440     * Compute Dest Alpha term
00441     */
00442    switch (blend->alpha_dst_factor) {
00443    case PIPE_BLENDFACTOR_ONE:
00444       spe_move(f, term2A_reg, fbA_reg);
00445       break;
00446    case PIPE_BLENDFACTOR_ZERO:
00447       spe_zero(f, term2A_reg);
00448       break;
00449    case PIPE_BLENDFACTOR_SRC_ALPHA:
00450       spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
00451       break;
00452    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00453       /* one = {1.0, 1.0, 1.0, 1.0} */
00454       if (!one_reg_set) {
00455          spe_load_float(f, one_reg, 1.0f);
00456          one_reg_set = true;
00457       }
00458       /* tmp = one - fragA */
00459       spe_fs(f, tmp_reg, one_reg, fragA_reg);
00460       /* termA = fbA * tmp */
00461       spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
00462       break;
00463       /* XXX more cases */
00464    default:
00465       ASSERT(0);
00466    }
00467 
00468    /*
00469     * Combine Src/Dest RGB terms
00470     */
00471    switch (blend->rgb_func) {
00472    case PIPE_BLEND_ADD:
00473       spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
00474       spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
00475       spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
00476       break;
00477    case PIPE_BLEND_SUBTRACT:
00478       spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
00479       spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
00480       spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
00481       break;
00482       /* XXX more cases */
00483    default:
00484       ASSERT(0);
00485    }
00486 
00487    /*
00488     * Combine Src/Dest A term
00489     */
00490    switch (blend->alpha_func) {
00491    case PIPE_BLEND_ADD:
00492       spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
00493       break;
00494    case PIPE_BLEND_SUBTRACT:
00495       spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
00496       break;
00497       /* XXX more cases */
00498    default:
00499       ASSERT(0);
00500    }
00501 
00502    spe_release_register(f, term1R_reg);
00503    spe_release_register(f, term1G_reg);
00504    spe_release_register(f, term1B_reg);
00505    spe_release_register(f, term1A_reg);
00506 
00507    spe_release_register(f, term2R_reg);
00508    spe_release_register(f, term2G_reg);
00509    spe_release_register(f, term2B_reg);
00510    spe_release_register(f, term2A_reg);
00511 
00512    spe_release_register(f, fbR_reg);
00513    spe_release_register(f, fbG_reg);
00514    spe_release_register(f, fbB_reg);
00515    spe_release_register(f, fbA_reg);
00516 
00517    spe_release_register(f, one_reg);
00518    spe_release_register(f, tmp_reg);
00519 }

static void gen_colormask ( uint  colormask,
struct spe_function f,
int  fragRGBA_reg,
int  fbRGBA_reg 
) [static]

Definition at line 533 of file cell_gen_fragment.c.

00536 {
00537    /* XXX to-do */
00538    /* operate on 32-bit packed pixels, not float colors */
00539 }

static void gen_depth_test ( const struct pipe_depth_stencil_alpha_state dsa,
struct spe_function f,
int  mask_reg,
int  ifragZ_reg,
int  ifbZ_reg,
int  zmask_reg 
) [static]

Generate SPE code to perform Z/depth testing.

Parameters:
dsa Gallium depth/stencil/alpha state to gen code for
f SPE function to append instruction onto.
mask_reg register containing quad/pixel "alive" mask (in/out)
ifragZ_reg register containing integer fragment Z values (in)
ifbZ_reg register containing integer frame buffer Z values (in/out)
zmask_reg register containing result of Z test/comparison (out)

Definition at line 59 of file cell_gen_fragment.c.

References ASSERT, pipe_depth_stencil_alpha_state::depth, pipe_depth_state::enabled, pipe_depth_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, spe_and(), spe_andc(), spe_ceq(), spe_cgt(), spe_il(), spe_move(), spe_selb(), and pipe_depth_state::writemask.

00062 {
00063    ASSERT(dsa->depth.enabled);
00064 
00065    switch (dsa->depth.func) {
00066    case PIPE_FUNC_EQUAL:
00067       /* zmask = (ifragZ == ref) */
00068       spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00069       /* mask = (mask & zmask) */
00070       spe_and(f, mask_reg, mask_reg, zmask_reg);
00071       break;
00072 
00073    case PIPE_FUNC_NOTEQUAL:
00074       /* zmask = (ifragZ == ref) */
00075       spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00076       /* mask = (mask & ~zmask) */
00077       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00078       break;
00079 
00080    case PIPE_FUNC_GREATER:
00081       /* zmask = (ifragZ > ref) */
00082       spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00083       /* mask = (mask & zmask) */
00084       spe_and(f, mask_reg, mask_reg, zmask_reg);
00085       break;
00086 
00087    case PIPE_FUNC_LESS:
00088       /* zmask = (ref > ifragZ) */
00089       spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
00090       /* mask = (mask & zmask) */
00091       spe_and(f, mask_reg, mask_reg, zmask_reg);
00092       break;
00093 
00094    case PIPE_FUNC_LEQUAL:
00095       /* zmask = (ifragZ > ref) */
00096       spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00097       /* mask = (mask & ~zmask) */
00098       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00099       break;
00100 
00101    case PIPE_FUNC_GEQUAL:
00102       /* zmask = (ref > ifragZ) */
00103       spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
00104       /* mask = (mask & ~zmask) */
00105       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00106       break;
00107 
00108    case PIPE_FUNC_NEVER:
00109       spe_il(f, mask_reg, 0);  /* mask = {0,0,0,0} */
00110       spe_move(f, zmask_reg, mask_reg);  /* zmask = mask */
00111       break;
00112 
00113    case PIPE_FUNC_ALWAYS:
00114       /* mask unchanged */
00115       spe_il(f, zmask_reg, ~0);  /* zmask = {~0,~0,~0,~0} */
00116       break;
00117 
00118    default:
00119       ASSERT(0);
00120       break;
00121    }
00122 
00123    if (dsa->depth.writemask) {
00124       /*
00125        * If (ztest passed) {
00126        *    framebufferZ = fragmentZ;
00127        * }
00128        * OR,
00129        * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
00130        */
00131       spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
00132    }
00133 }

static void gen_logicop ( const struct pipe_blend_state blend,
struct spe_function f,
int  fragRGBA_reg,
int  fbRGBA_reg 
) [static]

Definition at line 523 of file cell_gen_fragment.c.

00526 {
00527    /* XXX to-do */
00528    /* operate on 32-bit packed pixels, not float colors */
00529 }

static void gen_pack_colors ( struct spe_function f,
enum pipe_format  color_format,
int  r_reg,
int  g_reg,
int  b_reg,
int  a_reg,
int  rgba_reg 
) [static]

Generate code to pack a quad of float colors into a four 32-bit integers.

Parameters:
f SPE function to append instruction onto.
color_format the dest color packing format
r_reg register containing four red values (in/clobbered)
g_reg register containing four green values (in/clobbered)
b_reg register containing four blue values (in/clobbered)
a_reg register containing four alpha values (in/clobbered)
rgba_reg register to store the packed RGBA colors (out)

Definition at line 555 of file cell_gen_fragment.c.

References ASSERT, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, spe_cfltu(), spe_or(), spe_roti(), and spe_rotmi().

00559 {
00560    /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
00561    spe_cfltu(f, r_reg, r_reg, 32);
00562    spe_cfltu(f, g_reg, g_reg, 32);
00563    spe_cfltu(f, b_reg, b_reg, 32);
00564    spe_cfltu(f, a_reg, a_reg, 32);
00565 
00566    /* Shift the most significant bytes to least the significant positions.
00567     * I.e.: reg = reg >> 24
00568     */
00569    spe_rotmi(f, r_reg, r_reg, -24);
00570    spe_rotmi(f, g_reg, g_reg, -24);
00571    spe_rotmi(f, b_reg, b_reg, -24);
00572    spe_rotmi(f, a_reg, a_reg, -24);
00573 
00574    /* Shift the color bytes according to the surface format */
00575    if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
00576       spe_roti(f, g_reg, g_reg, 8);   /* green <<= 8 */
00577       spe_roti(f, r_reg, r_reg, 16);  /* red <<= 16 */
00578       spe_roti(f, a_reg, a_reg, 24);  /* alpha <<= 24 */
00579    }
00580    else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
00581       spe_roti(f, r_reg, r_reg, 8);   /* red <<= 8 */
00582       spe_roti(f, g_reg, g_reg, 16);  /* green <<= 16 */
00583       spe_roti(f, b_reg, b_reg, 24);  /* blue <<= 24 */
00584    }
00585    else {
00586       ASSERT(0);
00587    }
00588 
00589    /* Merge red, green, blue, alpha registers to make packed RGBA colors.
00590     * Eg: after shifting according to color_format we might have:
00591     *     R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
00592     *     G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
00593     *     B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
00594     *     A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
00595     * OR-ing all those together gives us four packed colors:
00596     *  RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
00597     */
00598    spe_or(f, rgba_reg, r_reg, g_reg);
00599    spe_or(f, rgba_reg, rgba_reg, b_reg);
00600    spe_or(f, rgba_reg, rgba_reg, a_reg);
00601 }


Generated on Tue Sep 29 06:25:30 2009 for Gallium3D by  doxygen 1.5.4