Go to the source code of this file.
Functions | |
void | cell_gen_fragment_function (struct cell_context *cell, struct spe_function *f) |
Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state. |
void cell_gen_fragment_function | ( | struct cell_context * | cell, | |
struct spe_function * | f | |||
) |
Generate SPE code to implement the fragment operations (alpha test, depth test, stencil test, blending, colormask, and final framebuffer write) as specified by the current context state.
Logically, this code will be called after running the fragment shader. But under some circumstances we could run some of this code before the fragment shader to cull fragments/quads that are totally occluded/discarded.
XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
See the spu_default_fragment_ops() function to see how the per-fragment operations would be done with ordinary C code. The code we generate here though has no branches, is SIMD, etc and should be much faster.
cell | the rendering context (in) | |
f | the generated function (out) |
< framebuffer's RGBA colors for quad
< framebuffer's combined z/stencil values for quad
Definition at line 627 of file cell_gen_fragment.c.
References pipe_depth_stencil_alpha_state::alpha, ASSERT, cell_blend_state::base, cell_depth_stencil_alpha_state::base, cell_context::blend, pipe_blend_state::blend_enable, pipe_framebuffer_state::cbufs, pipe_blend_state::colormask, pipe_depth_stencil_alpha_state::depth, cell_context::depth_stencil, pipe_stencil_state::enabled, pipe_depth_state::enabled, pipe_alpha_state::enabled, pipe_surface::format, cell_context::framebuffer, gen_alpha_test(), gen_blend(), gen_colormask(), gen_depth_test(), gen_logicop(), gen_pack_colors(), pipe_blend_state::logicop_enable, PIPE_FORMAT_S8_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z24X8_UNORM, spe_a(), spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_bi(), spe_cfltu(), spe_fm(), spe_fsmbi(), spe_init_func(), SPE_INST_SIZE, spe_load_float(), spe_lqx(), spe_or(), SPE_REG_RA, spe_release_register(), spe_rotmi(), spe_selb(), spe_shli(), spe_stqx(), SPU_MAX_FRAGMENT_OPS_INSTS, pipe_depth_stencil_alpha_state::stencil, TILE_SIZE, pipe_stencil_state::write_mask, pipe_depth_state::writemask, and pipe_framebuffer_state::zsbuf.
00628 { 00629 const struct pipe_depth_stencil_alpha_state *dsa = 00630 &cell->depth_stencil->base; 00631 const struct pipe_blend_state *blend = &cell->blend->base; 00632 const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; 00633 00634 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ 00635 const int x_reg = 3; /* uint */ 00636 const int y_reg = 4; /* uint */ 00637 const int color_tile_reg = 5; /* tile_t * */ 00638 const int depth_tile_reg = 6; /* tile_t * */ 00639 const int fragZ_reg = 7; /* vector float */ 00640 const int fragR_reg = 8; /* vector float */ 00641 const int fragG_reg = 9; /* vector float */ 00642 const int fragB_reg = 10; /* vector float */ 00643 const int fragA_reg = 11; /* vector float */ 00644 const int mask_reg = 12; /* vector uint */ 00645 00646 /* offset of quad from start of tile 00647 * XXX assuming 4-byte pixels for color AND Z/stencil!!!! 00648 */ 00649 int quad_offset_reg; 00650 00651 int fbRGBA_reg; 00652 int fbZS_reg; 00654 spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); 00655 spe_allocate_register(f, x_reg); 00656 spe_allocate_register(f, y_reg); 00657 spe_allocate_register(f, color_tile_reg); 00658 spe_allocate_register(f, depth_tile_reg); 00659 spe_allocate_register(f, fragZ_reg); 00660 spe_allocate_register(f, fragR_reg); 00661 spe_allocate_register(f, fragG_reg); 00662 spe_allocate_register(f, fragB_reg); 00663 spe_allocate_register(f, fragA_reg); 00664 spe_allocate_register(f, mask_reg); 00665 00666 quad_offset_reg = spe_allocate_available_register(f); 00667 fbRGBA_reg = spe_allocate_available_register(f); 00668 fbZS_reg = spe_allocate_available_register(f); 00669 00670 /* compute offset of quad from start of tile, in bytes */ 00671 { 00672 int x2_reg = spe_allocate_available_register(f); 00673 int y2_reg = spe_allocate_available_register(f); 00674 00675 ASSERT(TILE_SIZE == 32); 00676 00677 spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ 00678 spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ 00679 spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ 00680 spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ 00681 spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ 00682 00683 spe_release_register(f, x2_reg); 00684 spe_release_register(f, y2_reg); 00685 } 00686 00687 00688 if (dsa->alpha.enabled) { 00689 gen_alpha_test(dsa, f, mask_reg, fragA_reg); 00690 } 00691 00692 if (dsa->depth.enabled || dsa->stencil[0].enabled) { 00693 const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; 00694 boolean write_depth_stencil; 00695 00696 int fbZ_reg = spe_allocate_available_register(f); /* Z values */ 00697 int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ 00698 00699 /* fetch quad of depth/stencil values from tile at (x,y) */ 00700 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ 00701 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); 00702 00703 if (dsa->depth.enabled) { 00704 /* Extract Z bits from fbZS_reg into fbZ_reg */ 00705 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00706 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00707 int mask_reg = spe_allocate_available_register(f); 00708 spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ 00709 spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ 00710 spe_release_register(f, mask_reg); 00711 /* OK, fbZ_reg has four 24-bit Z values now */ 00712 } 00713 else { 00714 /* XXX handle other z/stencil formats */ 00715 ASSERT(0); 00716 } 00717 00718 /* Convert fragZ values from float[4] to uint[4] */ 00719 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00720 zs_format == PIPE_FORMAT_X8Z24_UNORM || 00721 zs_format == PIPE_FORMAT_Z24S8_UNORM || 00722 zs_format == PIPE_FORMAT_Z24X8_UNORM) { 00723 /* 24-bit Z values */ 00724 int scale_reg = spe_allocate_available_register(f); 00725 00726 /* scale_reg[0,1,2,3] = float(2^24-1) */ 00727 spe_load_float(f, scale_reg, (float) 0xffffff); 00728 00729 /* XXX these two instructions might be combined */ 00730 spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ 00731 spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ 00732 00733 spe_release_register(f, scale_reg); 00734 } 00735 else { 00736 /* XXX handle 16-bit Z format */ 00737 ASSERT(0); 00738 } 00739 } 00740 00741 if (dsa->stencil[0].enabled) { 00742 /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ 00743 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00744 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00745 /* XXX extract with a shift */ 00746 ASSERT(0); 00747 } 00748 else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || 00749 zs_format == PIPE_FORMAT_Z24X8_UNORM) { 00750 /* XXX extract with a mask */ 00751 ASSERT(0); 00752 } 00753 } 00754 00755 00756 if (dsa->stencil[0].enabled) { 00757 /* XXX this may involve depth testing too */ 00758 // gen_stencil_test(dsa, f, ... ); 00759 ASSERT(0); 00760 } 00761 else if (dsa->depth.enabled) { 00762 int zmask_reg = spe_allocate_available_register(f); 00763 gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); 00764 spe_release_register(f, zmask_reg); 00765 } 00766 00767 /* do we need to write Z and/or Stencil back into framebuffer? */ 00768 write_depth_stencil = (dsa->depth.writemask | 00769 dsa->stencil[0].write_mask | 00770 dsa->stencil[1].write_mask); 00771 00772 if (write_depth_stencil) { 00773 /* Merge latest Z and Stencil values into fbZS_reg. 00774 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. 00775 * fbS_reg has four 8-bit Z values in bits [7..0]. 00776 */ 00777 if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00778 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00779 spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ 00780 spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ 00781 } 00782 else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || 00783 zs_format == PIPE_FORMAT_X8Z24_UNORM) { 00784 /* XXX to do */ 00785 ASSERT(0); 00786 } 00787 else if (zs_format == PIPE_FORMAT_Z16_UNORM) { 00788 /* XXX to do */ 00789 ASSERT(0); 00790 } 00791 else if (zs_format == PIPE_FORMAT_S8_UNORM) { 00792 /* XXX to do */ 00793 ASSERT(0); 00794 } 00795 else { 00796 /* bad zs_format */ 00797 ASSERT(0); 00798 } 00799 00800 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ 00801 spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); 00802 } 00803 00804 spe_release_register(f, fbZ_reg); 00805 spe_release_register(f, fbS_reg); 00806 } 00807 00808 00809 /* Get framebuffer quad/colors. We'll need these for blending, 00810 * color masking, and to obey the quad/pixel mask. 00811 * Load: fbRGBA_reg = memory[color_tile + quad_offset] 00812 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking 00813 * we could skip this load. 00814 */ 00815 spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); 00816 00817 00818 if (blend->blend_enable) { 00819 gen_blend(blend, f, color_format, 00820 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); 00821 } 00822 00823 /* 00824 * Write fragment colors to framebuffer/tile. 00825 * This involves converting the fragment colors from float[4] to the 00826 * tile's specific format and obeying the quad/pixel mask. 00827 */ 00828 { 00829 int rgba_reg = spe_allocate_available_register(f); 00830 00831 /* Pack four float colors as four 32-bit int colors */ 00832 gen_pack_colors(f, color_format, 00833 fragR_reg, fragG_reg, fragB_reg, fragA_reg, 00834 rgba_reg); 00835 00836 if (blend->logicop_enable) { 00837 gen_logicop(blend, f, rgba_reg, fbRGBA_reg); 00838 } 00839 00840 if (blend->colormask != 0xf) { 00841 gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); 00842 } 00843 00844 00845 /* Mix fragment colors with framebuffer colors using the quad/pixel mask: 00846 * if (mask[i]) 00847 * rgba[i] = rgba[i]; 00848 * else 00849 * rgba[i] = framebuffer[i]; 00850 */ 00851 spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); 00852 00853 /* Store updated quad in tile: 00854 * memory[color_tile + quad_offset] = rgba_reg; 00855 */ 00856 spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); 00857 00858 spe_release_register(f, rgba_reg); 00859 } 00860 00861 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); 00862 00863 spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ 00864 00865 00866 spe_release_register(f, fbRGBA_reg); 00867 spe_release_register(f, fbZS_reg); 00868 spe_release_register(f, quad_offset_reg); 00869 }