Go to the source code of this file.
Defines | |
#define | OPTIMIZATIONS 1 |
Generate SPU per-fragment code (actually per-quad code). | |
Functions | |
static void | gen_depth_test (const struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) |
Generate SPE code to perform Z/depth testing. | |
static void | gen_alpha_test (const struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask_reg, int fragA_reg) |
Generate SPE code to perform alpha testing. | |
static void | gen_blend (const struct pipe_blend_state *blend, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, int fbRGBA_reg) |
Generate SPE code to implement the given blend mode for a quad of pixels. | |
static void | gen_logicop (const struct pipe_blend_state *blend, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) |
static void | gen_colormask (uint colormask, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) |
static void | gen_pack_colors (struct spe_function *f, enum pipe_format color_format, int r_reg, int g_reg, int b_reg, int a_reg, int rgba_reg) |
Generate code to pack a quad of float colors into a four 32-bit integers. | |
void | cell_gen_fragment_function (struct cell_context *cell, struct spe_function *f) |
Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state. |
#define OPTIMIZATIONS 1 |
Generate SPU per-fragment code (actually per-quad code).
Definition at line 45 of file cell_gen_fragment.c.
void cell_gen_fragment_function | ( | struct cell_context * | cell, | |
struct spe_function * | f | |||
) |
Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.
Logically, this code will be called after running the fragment shader. But under some circumstances we could run some of this code before the fragment shader to cull fragments/quads that are totally occluded/discarded.
XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
See the spu_default_fragment_ops() function to see how the per-fragment operations would be done with ordinary C code. The code we generate here though has no branches, is SIMD, etc and should be much faster.
cell | the rendering context (in) | |
f | the generated function (out) |
< framebuffer's RGBA colors for quad
< framebuffer's combined z/stencil values for quad
Definition at line 627 of file cell_gen_fragment.c.
References pipe_depth_stencil_alpha_state::alpha, ASSERT, cell_blend_state::base, cell_depth_stencil_alpha_state::base, cell_context::blend, pipe_blend_state::blend_enable, pipe_framebuffer_state::cbufs, pipe_blend_state::colormask, pipe_depth_stencil_alpha_state::depth, cell_context::depth_stencil, pipe_stencil_state::enabled, pipe_depth_state::enabled, pipe_alpha_state::enabled, pipe_surface::format, cell_context::framebuffer, gen_alpha_test(), gen_blend(), gen_colormask(), gen_depth_test(), gen_logicop(), gen_pack_colors(), pipe_blend_state::logicop_enable, PIPE_FORMAT_S8_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z24X8_UNORM, spe_a(), spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_bi(), spe_cfltu(), spe_fm(), spe_fsmbi(), spe_init_func(), SPE_INST_SIZE, spe_load_float(), spe_lqx(), spe_or(), SPE_REG_RA, spe_release_register(), spe_rotmi(), spe_selb(), spe_shli(), spe_stqx(), SPU_MAX_FRAGMENT_OPS_INSTS, pipe_depth_stencil_alpha_state::stencil, TILE_SIZE, pipe_stencil_state::write_mask, pipe_depth_state::writemask, and pipe_framebuffer_state::zsbuf.
00628 { 00629 const struct pipe_depth_stencil_alpha_state *dsa = 00630 &cell->depth_stencil->base; 00631 const struct pipe_blend_state *blend = &cell->blend->base; 00632 const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; 00633 00634 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ 00635 const int x_reg = 3; /* uint */ 00636 const int y_reg = 4; /* uint */ 00637 const int color_tile_reg = 5; /* tile_t * */ 00638 const int depth_tile_reg = 6; /* tile_t * */ 00639 const int fragZ_reg = 7; /* vector float */ 00640 const int fragR_reg = 8; /* vector float */ 00641 const int fragG_reg = 9; /* vector float */ 00642 const int fragB_reg = 10; /* vector float */ 00643 const int fragA_reg = 11; /* vector float */ 00644 const int mask_reg = 12; /* vector uint */ 00645 00646 /* offset of quad from start of tile 00647 * XXX assuming 4-byte pixels for color AND Z/stencil!!!! 00648 */ 00649 int quad_offset_reg; 00650 00651 int fbRGBA_reg; 00652 int fbZS_reg; 00654 spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); 00655 spe_allocate_register(f, x_reg); 00656 spe_allocate_register(f, y_reg); 00657 spe_allocate_register(f, color_tile_reg); 00658 spe_allocate_register(f, depth_tile_reg); 00659 spe_allocate_register(f, fragZ_reg); 00660 spe_allocate_register(f, fragR_reg); 00661 spe_allocate_register(f, fragG_reg); 00662 spe_allocate_register(f, fragB_reg); 00663 spe_allocate_register(f, fragA_reg); 00664 spe_allocate_register(f, mask_reg); 00665 00666 quad_offset_reg = spe_allocate_available_register(f); 00667 fbRGBA_reg = spe_allocate_available_register(f); 00668 fbZS_reg = spe_allocate_available_register(f); 00669 00670 /* compute offset of quad from start of tile, in bytes */ 00671 { 00672 int x2_reg = spe_allocate_available_register(f); 00673 int y2_reg = spe_allocate_available_register(f); 00674 00675 ASSERT(TILE_SIZE == 32); 00676 00677 spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ 00678 spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ 00679 spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ 00680 spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ 00681 spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ 00682 00683 spe_release_register(f, x2_reg); 00684 spe_release_register(f, y2_reg); 00685 } 00686 00687 00688 if (dsa->alpha.enabled) { 00689 gen_alpha_test(dsa, f, mask_reg, fragA_reg); 00690 } 00691 00692 if (dsa->depth.enabled || dsa->stencil[0].enabled) { 00693 const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; 00694 boolean write_depth_stencil; 00695 00696 int fbZ_reg = spe_allocate_available_register(f); /* Z values */ 00697 int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ 00698 00699 /* fetch quad of depth/stencil values from tile at (x,y) */ 00700 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ 00701 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); 00702 00703 if (dsa->depth.enabled) { 00704 /* Extract Z bits from fbZS_reg into fbZ_reg */ 00705 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00706 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00707 int mask_reg = spe_allocate_available_register(f); 00708 spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ 00709 spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ 00710 spe_release_register(f, mask_reg); 00711 /* OK, fbZ_reg has four 24-bit Z values now */ 00712 } 00713 else { 00714 /* XXX handle other z/stencil formats */ 00715 ASSERT(0); 00716 } 00717 00718 /* Convert fragZ values from float[4] to uint[4] */ 00719 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00720 zs_format == PIPE_FORMAT_X8Z24_UNORM || 00721 zs_format == PIPE_FORMAT_Z24S8_UNORM || 00722 zs_format == PIPE_FORMAT_Z24X8_UNORM) { 00723 /* 24-bit Z values */ 00724 int scale_reg = spe_allocate_available_register(f); 00725 00726 /* scale_reg[0,1,2,3] = float(2^24-1) */ 00727 spe_load_float(f, scale_reg, (float) 0xffffff); 00728 00729 /* XXX these two instructions might be combined */ 00730 spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ 00731 spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ 00732 00733 spe_release_register(f, scale_reg); 00734 } 00735 else { 00736 /* XXX handle 16-bit Z format */ 00737 ASSERT(0); 00738 } 00739 } 00740 00741 if (dsa->stencil[0].enabled) { 00742 /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ 00743 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00744 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00745 /* XXX extract with a shift */ 00746 ASSERT(0); 00747 } 00748 else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || 00749 zs_format == PIPE_FORMAT_Z24X8_UNORM) { 00750 /* XXX extract with a mask */ 00751 ASSERT(0); 00752 } 00753 } 00754 00755 00756 if (dsa->stencil[0].enabled) { 00757 /* XXX this may involve depth testing too */ 00758 // gen_stencil_test(dsa, f, ... ); 00759 ASSERT(0); 00760 } 00761 else if (dsa->depth.enabled) { 00762 int zmask_reg = spe_allocate_available_register(f); 00763 gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); 00764 spe_release_register(f, zmask_reg); 00765 } 00766 00767 /* do we need to write Z and/or Stencil back into framebuffer? */ 00768 write_depth_stencil = (dsa->depth.writemask | 00769 dsa->stencil[0].write_mask | 00770 dsa->stencil[1].write_mask); 00771 00772 if (write_depth_stencil) { 00773 /* Merge latest Z and Stencil values into fbZS_reg. 00774 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. 00775 * fbS_reg has four 8-bit Z values in bits [7..0]. 00776 */ 00777 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00778 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00779 spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ 00780 spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ 00781 } 00782 else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00783 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00784 /* XXX to do */ 00785 ASSERT(0); 00786 } 00787 else if (zs_format == PIPE_FORMAT_Z16_UNORM) { 00788 /* XXX to do */ 00789 ASSERT(0); 00790 } 00791 else if (zs_format == PIPE_FORMAT_S8_UNORM) { 00792 /* XXX to do */ 00793 ASSERT(0); 00794 } 00795 else { 00796 /* bad zs_format */ 00797 ASSERT(0); 00798 } 00799 00800 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ 00801 spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); 00802 } 00803 00804 spe_release_register(f, fbZ_reg); 00805 spe_release_register(f, fbS_reg); 00806 } 00807 00808 00809 /* Get framebuffer quad/colors. We'll need these for blending, 00810 * color masking, and to obey the quad/pixel mask. 00811 * Load: fbRGBA_reg = memory[color_tile + quad_offset] 00812 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking 00813 * we could skip this load. 00814 */ 00815 spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); 00816 00817 00818 if (blend->blend_enable) { 00819 gen_blend(blend, f, color_format, 00820 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); 00821 } 00822 00823 /* 00824 * Write fragment colors to framebuffer/tile. 00825 * This involves converting the fragment colors from float[4] to the 00826 * tile's specific format and obeying the quad/pixel mask. 00827 */ 00828 { 00829 int rgba_reg = spe_allocate_available_register(f); 00830 00831 /* Pack four float colors as four 32-bit int colors */ 00832 gen_pack_colors(f, color_format, 00833 fragR_reg, fragG_reg, fragB_reg, fragA_reg, 00834 rgba_reg); 00835 00836 if (blend->logicop_enable) { 00837 gen_logicop(blend, f, rgba_reg, fbRGBA_reg); 00838 } 00839 00840 if (blend->colormask != 0xf) { 00841 gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); 00842 } 00843 00844 00845 /* Mix fragment colors with framebuffer colors using the quad/pixel mask: 00846 * if (mask[i]) 00847 * rgba[i] = rgba[i]; 00848 * else 00849 * rgba[i] = framebuffer[i]; 00850 */ 00851 spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); 00852 00853 /* Store updated quad in tile: 00854 * memory[color_tile + quad_offset] = rgba_reg; 00855 */ 00856 spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); 00857 00858 spe_release_register(f, rgba_reg); 00859 } 00860 00861 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); 00862 00863 spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ 00864 00865 00866 spe_release_register(f, fbRGBA_reg); 00867 spe_release_register(f, fbZS_reg); 00868 spe_release_register(f, quad_offset_reg); 00869 }
static void gen_alpha_test | ( | const struct pipe_depth_stencil_alpha_state * | dsa, | |
struct spe_function * | f, | |||
int | mask_reg, | |||
int | fragA_reg | |||
) | [static] |
Generate SPE code to perform alpha testing.
dsa | Gallium depth/stencil/alpha state to gen code for | |
f | SPE function to append instruction onto. | |
mask_reg | register containing quad/pixel "alive" mask (in/out) | |
fragA_reg | register containing four fragment alpha values (in) |
Definition at line 145 of file cell_gen_fragment.c.
References pipe_depth_stencil_alpha_state::alpha, ASSERT, pipe_alpha_state::enabled, pipe_alpha_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, pipe_alpha_state::ref, spe_allocate_available_register(), spe_and(), spe_andc(), spe_biz(), spe_fceq(), spe_fcgt(), spe_il(), spe_load_float(), spe_orx(), SPE_REG_RA, and spe_release_register().
00147 { 00148 int ref_reg = spe_allocate_available_register(f); 00149 int amask_reg = spe_allocate_available_register(f); 00150 00151 ASSERT(dsa->alpha.enabled); 00152 00153 if ((dsa->alpha.func != PIPE_FUNC_NEVER) && 00154 (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { 00155 /* load/splat the alpha reference float value */ 00156 spe_load_float(f, ref_reg, dsa->alpha.ref); 00157 } 00158 00159 /* emit code to do the alpha comparison, updating 'mask' */ 00160 switch (dsa->alpha.func) { 00161 case PIPE_FUNC_EQUAL: 00162 /* amask = (fragA == ref) */ 00163 spe_fceq(f, amask_reg, fragA_reg, ref_reg); 00164 /* mask = (mask & amask) */ 00165 spe_and(f, mask_reg, mask_reg, amask_reg); 00166 break; 00167 00168 case PIPE_FUNC_NOTEQUAL: 00169 /* amask = (fragA == ref) */ 00170 spe_fceq(f, amask_reg, fragA_reg, ref_reg); 00171 /* mask = (mask & ~amask) */ 00172 spe_andc(f, mask_reg, mask_reg, amask_reg); 00173 break; 00174 00175 case PIPE_FUNC_GREATER: 00176 /* amask = (fragA > ref) */ 00177 spe_fcgt(f, amask_reg, fragA_reg, ref_reg); 00178 /* mask = (mask & amask) */ 00179 spe_and(f, mask_reg, mask_reg, amask_reg); 00180 break; 00181 00182 case PIPE_FUNC_LESS: 00183 /* amask = (ref > fragA) */ 00184 spe_fcgt(f, amask_reg, ref_reg, fragA_reg); 00185 /* mask = (mask & amask) */ 00186 spe_and(f, mask_reg, mask_reg, amask_reg); 00187 break; 00188 00189 case PIPE_FUNC_LEQUAL: 00190 /* amask = (fragA > ref) */ 00191 spe_fcgt(f, amask_reg, fragA_reg, ref_reg); 00192 /* mask = (mask & ~amask) */ 00193 spe_andc(f, mask_reg, mask_reg, amask_reg); 00194 break; 00195 00196 case PIPE_FUNC_GEQUAL: 00197 /* amask = (ref > fragA) */ 00198 spe_fcgt(f, amask_reg, ref_reg, fragA_reg); 00199 /* mask = (mask & ~amask) */ 00200 spe_andc(f, mask_reg, mask_reg, amask_reg); 00201 break; 00202 00203 case PIPE_FUNC_NEVER: 00204 spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ 00205 break; 00206 00207 case PIPE_FUNC_ALWAYS: 00208 /* no-op, mask unchanged */ 00209 break; 00210 00211 default: 00212 ASSERT(0); 00213 break; 00214 } 00215 00216 #if OPTIMIZATIONS 00217 /* if mask == {0,0,0,0} we're all done, return */ 00218 { 00219 /* re-use amask reg here */ 00220 int tmp_reg = amask_reg; 00221 /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ 00222 spe_orx(f, tmp_reg, mask_reg); 00223 /* if tmp[0] == 0 then return from function call */ 00224 spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); 00225 } 00226 #endif 00227 00228 spe_release_register(f, ref_reg); 00229 spe_release_register(f, amask_reg); 00230 }
static void gen_blend | ( | const struct pipe_blend_state * | blend, | |
struct spe_function * | f, | |||
enum pipe_format | color_format, | |||
int | fragR_reg, | |||
int | fragG_reg, | |||
int | fragB_reg, | |||
int | fragA_reg, | |||
int | fbRGBA_reg | |||
) | [static] |
Generate SPE code to implement the given blend mode for a quad of pixels.
f | SPE function to append instruction onto. | |
fragR_reg | register with fragment red values (float) (in/out) | |
fragG_reg | register with fragment green values (float) (in/out) | |
fragB_reg | register with fragment blue values (float) (in/out) | |
fragA_reg | register with fragment alpha values (float) (in/out) | |
fbRGBA_reg | register with packed framebuffer colors (integer) (in) |
Definition at line 244 of file cell_gen_fragment.c.
References pipe_blend_state::alpha_dst_factor, pipe_blend_state::alpha_func, pipe_blend_state::alpha_src_factor, ASSERT, pipe_blend_state::blend_enable, codegen::one_reg, PIPE_BLEND_ADD, PIPE_BLEND_SUBTRACT, PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC_ALPHA, PIPE_BLENDFACTOR_SRC_COLOR, PIPE_BLENDFACTOR_ZERO, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, pipe_blend_state::rgb_dst_factor, pipe_blend_state::rgb_func, pipe_blend_state::rgb_src_factor, spe_allocate_available_register(), spe_and(), spe_cuflt(), spe_fa(), spe_fm(), spe_fs(), spe_load_float(), spe_load_int(), spe_move(), spe_release_register(), spe_roti(), and spe_zero().
00249 { 00250 int term1R_reg = spe_allocate_available_register(f); 00251 int term1G_reg = spe_allocate_available_register(f); 00252 int term1B_reg = spe_allocate_available_register(f); 00253 int term1A_reg = spe_allocate_available_register(f); 00254 00255 int term2R_reg = spe_allocate_available_register(f); 00256 int term2G_reg = spe_allocate_available_register(f); 00257 int term2B_reg = spe_allocate_available_register(f); 00258 int term2A_reg = spe_allocate_available_register(f); 00259 00260 int fbR_reg = spe_allocate_available_register(f); 00261 int fbG_reg = spe_allocate_available_register(f); 00262 int fbB_reg = spe_allocate_available_register(f); 00263 int fbA_reg = spe_allocate_available_register(f); 00264 00265 int one_reg = spe_allocate_available_register(f); 00266 int tmp_reg = spe_allocate_available_register(f); 00267 00268 boolean one_reg_set = false; /* avoid setting one_reg more than once */ 00269 00270 ASSERT(blend->blend_enable); 00271 00272 /* Unpack/convert framebuffer colors from four 32-bit packed colors 00273 * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). 00274 * Each 8-bit color component is expanded into a float in [0.0, 1.0]. 00275 */ 00276 { 00277 int mask_reg = spe_allocate_available_register(f); 00278 00279 /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ 00280 spe_load_int(f, mask_reg, 0xff); 00281 00282 /* XXX there may be more clever ways to implement the following code */ 00283 switch (color_format) { 00284 case PIPE_FORMAT_A8R8G8B8_UNORM: 00285 /* fbB = fbB & mask */ 00286 spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); 00287 /* mask = mask << 8 */ 00288 spe_roti(f, mask_reg, mask_reg, 8); 00289 00290 /* fbG = fbRGBA & mask */ 00291 spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); 00292 /* fbG = fbG >> 8 */ 00293 spe_roti(f, fbG_reg, fbG_reg, -8); 00294 /* mask = mask << 8 */ 00295 spe_roti(f, mask_reg, mask_reg, 8); 00296 00297 /* fbR = fbRGBA & mask */ 00298 spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); 00299 /* fbR = fbR >> 16 */ 00300 spe_roti(f, fbR_reg, fbR_reg, -16); 00301 /* mask = mask << 8 */ 00302 spe_roti(f, mask_reg, mask_reg, 8); 00303 00304 /* fbA = fbRGBA & mask */ 00305 spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); 00306 /* fbA = fbA >> 24 */ 00307 spe_roti(f, fbA_reg, fbA_reg, -24); 00308 break; 00309 00310 case PIPE_FORMAT_B8G8R8A8_UNORM: 00311 /* fbA = fbA & mask */ 00312 spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); 00313 /* mask = mask << 8 */ 00314 spe_roti(f, mask_reg, mask_reg, 8); 00315 00316 /* fbR = fbRGBA & mask */ 00317 spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); 00318 /* fbR = fbR >> 8 */ 00319 spe_roti(f, fbR_reg, fbR_reg, -8); 00320 /* mask = mask << 8 */ 00321 spe_roti(f, mask_reg, mask_reg, 8); 00322 00323 /* fbG = fbRGBA & mask */ 00324 spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); 00325 /* fbG = fbG >> 16 */ 00326 spe_roti(f, fbG_reg, fbG_reg, -16); 00327 /* mask = mask << 8 */ 00328 spe_roti(f, mask_reg, mask_reg, 8); 00329 00330 /* fbB = fbRGBA & mask */ 00331 spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); 00332 /* fbB = fbB >> 24 */ 00333 spe_roti(f, fbB_reg, fbB_reg, -24); 00334 break; 00335 00336 default: 00337 ASSERT(0); 00338 } 00339 00340 /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ 00341 spe_cuflt(f, fbR_reg, fbR_reg, 8); 00342 spe_cuflt(f, fbG_reg, fbG_reg, 8); 00343 spe_cuflt(f, fbB_reg, fbB_reg, 8); 00344 spe_cuflt(f, fbA_reg, fbA_reg, 8); 00345 00346 spe_release_register(f, mask_reg); 00347 } 00348 00349 00350 /* 00351 * Compute Src RGB terms 00352 */ 00353 switch (blend->rgb_src_factor) { 00354 case PIPE_BLENDFACTOR_ONE: 00355 spe_move(f, term1R_reg, fragR_reg); 00356 spe_move(f, term1G_reg, fragG_reg); 00357 spe_move(f, term1B_reg, fragB_reg); 00358 break; 00359 case PIPE_BLENDFACTOR_ZERO: 00360 spe_zero(f, term1R_reg); 00361 spe_zero(f, term1G_reg); 00362 spe_zero(f, term1B_reg); 00363 break; 00364 case PIPE_BLENDFACTOR_SRC_COLOR: 00365 spe_fm(f, term1R_reg, fragR_reg, fragR_reg); 00366 spe_fm(f, term1G_reg, fragG_reg, fragG_reg); 00367 spe_fm(f, term1B_reg, fragB_reg, fragB_reg); 00368 break; 00369 case PIPE_BLENDFACTOR_SRC_ALPHA: 00370 spe_fm(f, term1R_reg, fragR_reg, fragA_reg); 00371 spe_fm(f, term1G_reg, fragG_reg, fragA_reg); 00372 spe_fm(f, term1B_reg, fragB_reg, fragA_reg); 00373 break; 00374 /* XXX more cases */ 00375 default: 00376 ASSERT(0); 00377 } 00378 00379 /* 00380 * Compute Src Alpha term 00381 */ 00382 switch (blend->alpha_src_factor) { 00383 case PIPE_BLENDFACTOR_ONE: 00384 spe_move(f, term1A_reg, fragA_reg); 00385 break; 00386 case PIPE_BLENDFACTOR_SRC_COLOR: 00387 spe_fm(f, term1A_reg, fragA_reg, fragA_reg); 00388 break; 00389 case PIPE_BLENDFACTOR_SRC_ALPHA: 00390 spe_fm(f, term1A_reg, fragA_reg, fragA_reg); 00391 break; 00392 /* XXX more cases */ 00393 default: 00394 ASSERT(0); 00395 } 00396 00397 /* 00398 * Compute Dest RGB terms 00399 */ 00400 switch (blend->rgb_dst_factor) { 00401 case PIPE_BLENDFACTOR_ONE: 00402 spe_move(f, term2R_reg, fbR_reg); 00403 spe_move(f, term2G_reg, fbG_reg); 00404 spe_move(f, term2B_reg, fbB_reg); 00405 break; 00406 case PIPE_BLENDFACTOR_ZERO: 00407 spe_zero(f, term2R_reg); 00408 spe_zero(f, term2G_reg); 00409 spe_zero(f, term2B_reg); 00410 break; 00411 case PIPE_BLENDFACTOR_SRC_COLOR: 00412 spe_fm(f, term2R_reg, fbR_reg, fragR_reg); 00413 spe_fm(f, term2G_reg, fbG_reg, fragG_reg); 00414 spe_fm(f, term2B_reg, fbB_reg, fragB_reg); 00415 break; 00416 case PIPE_BLENDFACTOR_SRC_ALPHA: 00417 spe_fm(f, term2R_reg, fbR_reg, fragA_reg); 00418 spe_fm(f, term2G_reg, fbG_reg, fragA_reg); 00419 spe_fm(f, term2B_reg, fbB_reg, fragA_reg); 00420 break; 00421 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 00422 /* one = {1.0, 1.0, 1.0, 1.0} */ 00423 if (!one_reg_set) { 00424 spe_load_float(f, one_reg, 1.0f); 00425 one_reg_set = true; 00426 } 00427 /* tmp = one - fragA */ 00428 spe_fs(f, tmp_reg, one_reg, fragA_reg); 00429 /* term = fb * tmp */ 00430 spe_fm(f, term2R_reg, fbR_reg, tmp_reg); 00431 spe_fm(f, term2G_reg, fbG_reg, tmp_reg); 00432 spe_fm(f, term2B_reg, fbB_reg, tmp_reg); 00433 break; 00434 /* XXX more cases */ 00435 default: 00436 ASSERT(0); 00437 } 00438 00439 /* 00440 * Compute Dest Alpha term 00441 */ 00442 switch (blend->alpha_dst_factor) { 00443 case PIPE_BLENDFACTOR_ONE: 00444 spe_move(f, term2A_reg, fbA_reg); 00445 break; 00446 case PIPE_BLENDFACTOR_ZERO: 00447 spe_zero(f, term2A_reg); 00448 break; 00449 case PIPE_BLENDFACTOR_SRC_ALPHA: 00450 spe_fm(f, term2A_reg, fbA_reg, fragA_reg); 00451 break; 00452 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 00453 /* one = {1.0, 1.0, 1.0, 1.0} */ 00454 if (!one_reg_set) { 00455 spe_load_float(f, one_reg, 1.0f); 00456 one_reg_set = true; 00457 } 00458 /* tmp = one - fragA */ 00459 spe_fs(f, tmp_reg, one_reg, fragA_reg); 00460 /* termA = fbA * tmp */ 00461 spe_fm(f, term2A_reg, fbA_reg, tmp_reg); 00462 break; 00463 /* XXX more cases */ 00464 default: 00465 ASSERT(0); 00466 } 00467 00468 /* 00469 * Combine Src/Dest RGB terms 00470 */ 00471 switch (blend->rgb_func) { 00472 case PIPE_BLEND_ADD: 00473 spe_fa(f, fragR_reg, term1R_reg, term2R_reg); 00474 spe_fa(f, fragG_reg, term1G_reg, term2G_reg); 00475 spe_fa(f, fragB_reg, term1B_reg, term2B_reg); 00476 break; 00477 case PIPE_BLEND_SUBTRACT: 00478 spe_fs(f, fragR_reg, term1R_reg, term2R_reg); 00479 spe_fs(f, fragG_reg, term1G_reg, term2G_reg); 00480 spe_fs(f, fragB_reg, term1B_reg, term2B_reg); 00481 break; 00482 /* XXX more cases */ 00483 default: 00484 ASSERT(0); 00485 } 00486 00487 /* 00488 * Combine Src/Dest A term 00489 */ 00490 switch (blend->alpha_func) { 00491 case PIPE_BLEND_ADD: 00492 spe_fa(f, fragA_reg, term1A_reg, term2A_reg); 00493 break; 00494 case PIPE_BLEND_SUBTRACT: 00495 spe_fs(f, fragA_reg, term1A_reg, term2A_reg); 00496 break; 00497 /* XXX more cases */ 00498 default: 00499 ASSERT(0); 00500 } 00501 00502 spe_release_register(f, term1R_reg); 00503 spe_release_register(f, term1G_reg); 00504 spe_release_register(f, term1B_reg); 00505 spe_release_register(f, term1A_reg); 00506 00507 spe_release_register(f, term2R_reg); 00508 spe_release_register(f, term2G_reg); 00509 spe_release_register(f, term2B_reg); 00510 spe_release_register(f, term2A_reg); 00511 00512 spe_release_register(f, fbR_reg); 00513 spe_release_register(f, fbG_reg); 00514 spe_release_register(f, fbB_reg); 00515 spe_release_register(f, fbA_reg); 00516 00517 spe_release_register(f, one_reg); 00518 spe_release_register(f, tmp_reg); 00519 }
static void gen_colormask | ( | uint | colormask, | |
struct spe_function * | f, | |||
int | fragRGBA_reg, | |||
int | fbRGBA_reg | |||
) | [static] |
static void gen_depth_test | ( | const struct pipe_depth_stencil_alpha_state * | dsa, | |
struct spe_function * | f, | |||
int | mask_reg, | |||
int | ifragZ_reg, | |||
int | ifbZ_reg, | |||
int | zmask_reg | |||
) | [static] |
Generate SPE code to perform Z/depth testing.
dsa | Gallium depth/stencil/alpha state to gen code for | |
f | SPE function to append instruction onto. | |
mask_reg | register containing quad/pixel "alive" mask (in/out) | |
ifragZ_reg | register containing integer fragment Z values (in) | |
ifbZ_reg | register containing integer frame buffer Z values (in/out) | |
zmask_reg | register containing result of Z test/comparison (out) |
Definition at line 59 of file cell_gen_fragment.c.
References ASSERT, pipe_depth_stencil_alpha_state::depth, pipe_depth_state::enabled, pipe_depth_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, spe_and(), spe_andc(), spe_ceq(), spe_cgt(), spe_il(), spe_move(), spe_selb(), and pipe_depth_state::writemask.
00062 { 00063 ASSERT(dsa->depth.enabled); 00064 00065 switch (dsa->depth.func) { 00066 case PIPE_FUNC_EQUAL: 00067 /* zmask = (ifragZ == ref) */ 00068 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); 00069 /* mask = (mask & zmask) */ 00070 spe_and(f, mask_reg, mask_reg, zmask_reg); 00071 break; 00072 00073 case PIPE_FUNC_NOTEQUAL: 00074 /* zmask = (ifragZ == ref) */ 00075 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); 00076 /* mask = (mask & ~zmask) */ 00077 spe_andc(f, mask_reg, mask_reg, zmask_reg); 00078 break; 00079 00080 case PIPE_FUNC_GREATER: 00081 /* zmask = (ifragZ > ref) */ 00082 spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); 00083 /* mask = (mask & zmask) */ 00084 spe_and(f, mask_reg, mask_reg, zmask_reg); 00085 break; 00086 00087 case PIPE_FUNC_LESS: 00088 /* zmask = (ref > ifragZ) */ 00089 spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); 00090 /* mask = (mask & zmask) */ 00091 spe_and(f, mask_reg, mask_reg, zmask_reg); 00092 break; 00093 00094 case PIPE_FUNC_LEQUAL: 00095 /* zmask = (ifragZ > ref) */ 00096 spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); 00097 /* mask = (mask & ~zmask) */ 00098 spe_andc(f, mask_reg, mask_reg, zmask_reg); 00099 break; 00100 00101 case PIPE_FUNC_GEQUAL: 00102 /* zmask = (ref > ifragZ) */ 00103 spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); 00104 /* mask = (mask & ~zmask) */ 00105 spe_andc(f, mask_reg, mask_reg, zmask_reg); 00106 break; 00107 00108 case PIPE_FUNC_NEVER: 00109 spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ 00110 spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ 00111 break; 00112 00113 case PIPE_FUNC_ALWAYS: 00114 /* mask unchanged */ 00115 spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ 00116 break; 00117 00118 default: 00119 ASSERT(0); 00120 break; 00121 } 00122 00123 if (dsa->depth.writemask) { 00124 /* 00125 * If (ztest passed) { 00126 * framebufferZ = fragmentZ; 00127 * } 00128 * OR, 00129 * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; 00130 */ 00131 spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); 00132 } 00133 }
static void gen_logicop | ( | const struct pipe_blend_state * | blend, | |
struct spe_function * | f, | |||
int | fragRGBA_reg, | |||
int | fbRGBA_reg | |||
) | [static] |
static void gen_pack_colors | ( | struct spe_function * | f, | |
enum pipe_format | color_format, | |||
int | r_reg, | |||
int | g_reg, | |||
int | b_reg, | |||
int | a_reg, | |||
int | rgba_reg | |||
) | [static] |
Generate code to pack a quad of float colors into a four 32-bit integers.
f | SPE function to append instruction onto. | |
color_format | the dest color packing format | |
r_reg | register containing four red values (in/clobbered) | |
g_reg | register containing four green values (in/clobbered) | |
b_reg | register containing four blue values (in/clobbered) | |
a_reg | register containing four alpha values (in/clobbered) | |
rgba_reg | register to store the packed RGBA colors (out) |
Definition at line 555 of file cell_gen_fragment.c.
References ASSERT, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, spe_cfltu(), spe_or(), spe_roti(), and spe_rotmi().
00559 { 00560 /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ 00561 spe_cfltu(f, r_reg, r_reg, 32); 00562 spe_cfltu(f, g_reg, g_reg, 32); 00563 spe_cfltu(f, b_reg, b_reg, 32); 00564 spe_cfltu(f, a_reg, a_reg, 32); 00565 00566 /* Shift the most significant bytes to least the significant positions. 00567 * I.e.: reg = reg >> 24 00568 */ 00569 spe_rotmi(f, r_reg, r_reg, -24); 00570 spe_rotmi(f, g_reg, g_reg, -24); 00571 spe_rotmi(f, b_reg, b_reg, -24); 00572 spe_rotmi(f, a_reg, a_reg, -24); 00573 00574 /* Shift the color bytes according to the surface format */ 00575 if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { 00576 spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ 00577 spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ 00578 spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ 00579 } 00580 else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { 00581 spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ 00582 spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ 00583 spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ 00584 } 00585 else { 00586 ASSERT(0); 00587 } 00588 00589 /* Merge red, green, blue, alpha registers to make packed RGBA colors. 00590 * Eg: after shifting according to color_format we might have: 00591 * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} 00592 * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} 00593 * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} 00594 * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} 00595 * OR-ing all those together gives us four packed colors: 00596 * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} 00597 */ 00598 spe_or(f, rgba_reg, r_reg, g_reg); 00599 spe_or(f, rgba_reg, rgba_reg, b_reg); 00600 spe_or(f, rgba_reg, rgba_reg, a_reg); 00601 }