cell_gen_fragment.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void cell_gen_fragment_function (struct cell_context *cell, struct spe_function *f)
 Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.


Function Documentation

void cell_gen_fragment_function ( struct cell_context cell,
struct spe_function f 
)

Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.

Logically, this code will be called after running the fragment shader. But under some circumstances we could run some of this code before the fragment shader to cull fragments/quads that are totally occluded/discarded.

XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.

See the spu_default_fragment_ops() function to see how the per-fragment operations would be done with ordinary C code. The code we generate here though has no branches, is SIMD, etc and should be much faster.

Parameters:
cell the rendering context (in)
f the generated function (out)

< framebuffer's RGBA colors for quad

< framebuffer's combined z/stencil values for quad

Definition at line 627 of file cell_gen_fragment.c.

References pipe_depth_stencil_alpha_state::alpha, ASSERT, cell_blend_state::base, cell_depth_stencil_alpha_state::base, cell_context::blend, pipe_blend_state::blend_enable, pipe_framebuffer_state::cbufs, pipe_blend_state::colormask, pipe_depth_stencil_alpha_state::depth, cell_context::depth_stencil, pipe_stencil_state::enabled, pipe_depth_state::enabled, pipe_alpha_state::enabled, pipe_surface::format, cell_context::framebuffer, gen_alpha_test(), gen_blend(), gen_colormask(), gen_depth_test(), gen_logicop(), gen_pack_colors(), pipe_blend_state::logicop_enable, PIPE_FORMAT_S8_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z24X8_UNORM, spe_a(), spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_bi(), spe_cfltu(), spe_fm(), spe_fsmbi(), spe_init_func(), SPE_INST_SIZE, spe_load_float(), spe_lqx(), spe_or(), SPE_REG_RA, spe_release_register(), spe_rotmi(), spe_selb(), spe_shli(), spe_stqx(), SPU_MAX_FRAGMENT_OPS_INSTS, pipe_depth_stencil_alpha_state::stencil, TILE_SIZE, pipe_stencil_state::write_mask, pipe_depth_state::writemask, and pipe_framebuffer_state::zsbuf.

00628 {
00629    const struct pipe_depth_stencil_alpha_state *dsa =
00630       &cell->depth_stencil->base;
00631    const struct pipe_blend_state *blend = &cell->blend->base;
00632    const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
00633 
00634    /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
00635    const int x_reg = 3;  /* uint */
00636    const int y_reg = 4;  /* uint */
00637    const int color_tile_reg = 5;  /* tile_t * */
00638    const int depth_tile_reg = 6;  /* tile_t * */
00639    const int fragZ_reg = 7;   /* vector float */
00640    const int fragR_reg = 8;   /* vector float */
00641    const int fragG_reg = 9;   /* vector float */
00642    const int fragB_reg = 10;  /* vector float */
00643    const int fragA_reg = 11;  /* vector float */
00644    const int mask_reg = 12;   /* vector uint */
00645 
00646    /* offset of quad from start of tile
00647     * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
00648     */
00649    int quad_offset_reg;
00650 
00651    int fbRGBA_reg;  
00652    int fbZS_reg;    
00654    spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
00655    spe_allocate_register(f, x_reg);
00656    spe_allocate_register(f, y_reg);
00657    spe_allocate_register(f, color_tile_reg);
00658    spe_allocate_register(f, depth_tile_reg);
00659    spe_allocate_register(f, fragZ_reg);
00660    spe_allocate_register(f, fragR_reg);
00661    spe_allocate_register(f, fragG_reg);
00662    spe_allocate_register(f, fragB_reg);
00663    spe_allocate_register(f, fragA_reg);
00664    spe_allocate_register(f, mask_reg);
00665 
00666    quad_offset_reg = spe_allocate_available_register(f);
00667    fbRGBA_reg = spe_allocate_available_register(f);
00668    fbZS_reg = spe_allocate_available_register(f);
00669 
00670    /* compute offset of quad from start of tile, in bytes */
00671    {
00672       int x2_reg = spe_allocate_available_register(f);
00673       int y2_reg = spe_allocate_available_register(f);
00674 
00675       ASSERT(TILE_SIZE == 32);
00676 
00677       spe_rotmi(f, x2_reg, x_reg, -1);  /* x2 = x / 2 */
00678       spe_rotmi(f, y2_reg, y_reg, -1);  /* y2 = y / 2 */
00679       spe_shli(f, y2_reg, y2_reg, 4);   /* y2 *= 16 */
00680       spe_a(f, quad_offset_reg, y2_reg, x2_reg);  /* offset = y2 + x2 */
00681       spe_shli(f, quad_offset_reg, quad_offset_reg, 4);   /* offset *= 16 */
00682 
00683       spe_release_register(f, x2_reg);
00684       spe_release_register(f, y2_reg);
00685    }
00686 
00687 
00688    if (dsa->alpha.enabled) {
00689       gen_alpha_test(dsa, f, mask_reg, fragA_reg);
00690    }
00691 
00692    if (dsa->depth.enabled || dsa->stencil[0].enabled) {
00693       const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
00694       boolean write_depth_stencil;
00695 
00696       int fbZ_reg = spe_allocate_available_register(f); /* Z values */
00697       int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
00698 
00699       /* fetch quad of depth/stencil values from tile at (x,y) */
00700       /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
00701       spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00702 
00703       if (dsa->depth.enabled) {
00704          /* Extract Z bits from fbZS_reg into fbZ_reg */
00705          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00706              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00707             int mask_reg = spe_allocate_available_register(f);
00708             spe_fsmbi(f, mask_reg, 0x7777);  /* mask[0,1,2,3] = 0x00ffffff */
00709             spe_and(f, fbZ_reg, fbZS_reg, mask_reg);  /* fbZ = fbZS & mask */
00710             spe_release_register(f, mask_reg);
00711             /* OK, fbZ_reg has four 24-bit Z values now */
00712          }
00713          else {
00714             /* XXX handle other z/stencil formats */
00715             ASSERT(0);
00716          }
00717 
00718          /* Convert fragZ values from float[4] to uint[4] */
00719          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00720              zs_format == PIPE_FORMAT_X8Z24_UNORM ||
00721              zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00722              zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00723             /* 24-bit Z values */
00724             int scale_reg = spe_allocate_available_register(f);
00725 
00726             /* scale_reg[0,1,2,3] = float(2^24-1) */
00727             spe_load_float(f, scale_reg, (float) 0xffffff);
00728 
00729             /* XXX these two instructions might be combined */
00730             spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
00731             spe_cfltu(f, fragZ_reg, fragZ_reg, 0);  /* fragZ = (int) fragZ */
00732 
00733             spe_release_register(f, scale_reg);
00734          }
00735          else {
00736             /* XXX handle 16-bit Z format */
00737             ASSERT(0);
00738          }
00739       }
00740 
00741       if (dsa->stencil[0].enabled) {
00742          /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
00743          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00744              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00745             /* XXX extract with a shift */
00746             ASSERT(0);
00747          }
00748          else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00749                   zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00750             /* XXX extract with a mask */
00751             ASSERT(0);
00752          }
00753       }
00754 
00755 
00756       if (dsa->stencil[0].enabled) {
00757          /* XXX this may involve depth testing too */
00758          // gen_stencil_test(dsa, f, ... );
00759          ASSERT(0);
00760       }
00761       else if (dsa->depth.enabled) {
00762          int zmask_reg = spe_allocate_available_register(f);
00763          gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
00764          spe_release_register(f, zmask_reg);
00765       }
00766 
00767       /* do we need to write Z and/or Stencil back into framebuffer? */
00768       write_depth_stencil = (dsa->depth.writemask |
00769                              dsa->stencil[0].write_mask |
00770                              dsa->stencil[1].write_mask);
00771 
00772       if (write_depth_stencil) {
00773          /* Merge latest Z and Stencil values into fbZS_reg.
00774           * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
00775           * fbS_reg has four 8-bit Z values in bits [7..0].
00776           */
00777          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00778              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00779             spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
00780             spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
00781          }
00782          else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00783                   zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00784             /* XXX to do */
00785             ASSERT(0);
00786          }
00787          else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
00788             /* XXX to do */
00789             ASSERT(0);
00790          }
00791          else if (zs_format == PIPE_FORMAT_S8_UNORM) {
00792             /* XXX to do */
00793             ASSERT(0);
00794          }
00795          else {
00796             /* bad zs_format */
00797             ASSERT(0);
00798          }
00799 
00800          /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
00801          spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00802       }
00803 
00804       spe_release_register(f, fbZ_reg);
00805       spe_release_register(f, fbS_reg);
00806    }
00807 
00808 
00809    /* Get framebuffer quad/colors.  We'll need these for blending,
00810     * color masking, and to obey the quad/pixel mask.
00811     * Load: fbRGBA_reg = memory[color_tile + quad_offset]
00812     * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
00813     * we could skip this load.
00814     */
00815    spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
00816 
00817 
00818    if (blend->blend_enable) {
00819       gen_blend(blend, f, color_format,
00820                 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
00821    }
00822 
00823    /*
00824     * Write fragment colors to framebuffer/tile.
00825     * This involves converting the fragment colors from float[4] to the
00826     * tile's specific format and obeying the quad/pixel mask.
00827     */
00828    {
00829       int rgba_reg = spe_allocate_available_register(f);
00830 
00831       /* Pack four float colors as four 32-bit int colors */
00832       gen_pack_colors(f, color_format,
00833                       fragR_reg, fragG_reg, fragB_reg, fragA_reg,
00834                       rgba_reg);
00835 
00836       if (blend->logicop_enable) {
00837          gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
00838       }
00839 
00840       if (blend->colormask != 0xf) {
00841          gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
00842       }
00843 
00844 
00845       /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
00846        * if (mask[i])
00847        *    rgba[i] = rgba[i];
00848        * else
00849        *    rgba[i] = framebuffer[i];
00850        */
00851       spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
00852 
00853       /* Store updated quad in tile:
00854        * memory[color_tile + quad_offset] = rgba_reg;
00855        */
00856       spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
00857 
00858       spe_release_register(f, rgba_reg);
00859    }
00860 
00861    //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
00862 
00863    spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */
00864 
00865 
00866    spe_release_register(f, fbRGBA_reg);
00867    spe_release_register(f, fbZS_reg);
00868    spe_release_register(f, quad_offset_reg);
00869 }


Generated on Tue Sep 29 06:25:30 2009 for Gallium3D by  doxygen 1.5.4