Go to the source code of this file.
Defines | |
#define | LINEAR_QUAD_LAYOUT 1 |
Functions | |
void | spu_fallback_fragment_ops (uint x, uint y, tile_t *colorTile, tile_t *depthStencilTile, vector float fragZ, vector float fragR, vector float fragG, vector float fragB, vector float fragA, vector unsigned int mask) |
Called by rasterizer for each quad after the shader has run. |
#define LINEAR_QUAD_LAYOUT 1 |
void spu_fallback_fragment_ops | ( | uint | x, | |
uint | y, | |||
tile_t * | colorTile, | |||
tile_t * | depthStencilTile, | |||
vector float | fragZ, | |||
vector float | fragR, | |||
vector float | fragG, | |||
vector float | fragB, | |||
vector float | fragA, | |||
vector unsigned int | mask | |||
) |
Called by rasterizer for each quad after the shader has run.
Do all the per-fragment operations including alpha test, z test, stencil test, blend, colormask and logicops. This is a fallback/debug function. In reality we'll use a generated function produced by the PPU. But this function is useful for debug/validation.
< modified locally, but not put back yet
< modified locally, but not put back yet
Definition at line 52 of file spu_per_fragment_op.c.
References pipe_depth_stencil_alpha_state::alpha, pipe_blend_state::alpha_dst_factor, pipe_blend_state::alpha_func, pipe_blend_state::alpha_src_factor, ASSERT, spu_global::blend, pipe_blend_state::blend_enable, spu_framebuffer::color_format, pipe_blend_state::colormask, spu_global::cur_ctile_status, spu_global::cur_ztile_status, pipe_depth_stencil_alpha_state::depth, spu_framebuffer::depth_format, spu_global::depth_stencil_alpha, pipe_stencil_state::enabled, pipe_depth_state::enabled, pipe_alpha_state::enabled, spu_global::fb, pipe_depth_state::func, pipe_alpha_state::func, pipe_blend_state::logicop_enable, PIPE_BLEND_ADD, PIPE_BLEND_SUBTRACT, PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC_ALPHA, PIPE_BLENDFACTOR_SRC_COLOR, PIPE_BLENDFACTOR_ZERO, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_X8Z24_UNORM, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, pipe_alpha_state::ref, pipe_blend_state::rgb_dst_factor, pipe_blend_state::rgb_func, pipe_blend_state::rgb_src_factor, spu, spu_pack_A8R8G8B8(), spu_pack_B8G8R8A8(), spu_unpack_A8R8G8B8(), spu_unpack_B8G8R8A8(), pipe_depth_stencil_alpha_state::stencil, TILE_STATUS_DIRTY, tile_t::ui, and tile_t::ui4.
00061 { 00062 vector float frag_aos[4]; 00063 unsigned int c0, c1, c2, c3; 00064 00065 /* do alpha test */ 00066 if (spu.depth_stencil_alpha.alpha.enabled) { 00067 vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); 00068 vector unsigned int amask; 00069 00070 switch (spu.depth_stencil_alpha.alpha.func) { 00071 case PIPE_FUNC_LESS: 00072 amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ 00073 break; 00074 case PIPE_FUNC_GREATER: 00075 amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ 00076 break; 00077 case PIPE_FUNC_GEQUAL: 00078 amask = spu_cmpgt(ref, fragA); 00079 amask = spu_nor(amask, amask); 00080 break; 00081 case PIPE_FUNC_LEQUAL: 00082 amask = spu_cmpgt(fragA, ref); 00083 amask = spu_nor(amask, amask); 00084 break; 00085 case PIPE_FUNC_EQUAL: 00086 amask = spu_cmpeq(ref, fragA); 00087 break; 00088 case PIPE_FUNC_NOTEQUAL: 00089 amask = spu_cmpeq(ref, fragA); 00090 amask = spu_nor(amask, amask); 00091 break; 00092 case PIPE_FUNC_ALWAYS: 00093 amask = spu_splats(0xffffffffU); 00094 break; 00095 case PIPE_FUNC_NEVER: 00096 amask = spu_splats( 0x0U); 00097 break; 00098 default: 00099 ; 00100 } 00101 00102 mask = spu_and(mask, amask); 00103 } 00104 00105 /* Z and/or stencil testing... */ 00106 if (spu.depth_stencil_alpha.depth.enabled || 00107 spu.depth_stencil_alpha.stencil[0].enabled) { 00108 00109 /* get four Z/Stencil values from tile */ 00110 vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); 00111 vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; 00112 vector unsigned int ifbZ = spu_and(ifbZS, mask24); 00113 vector unsigned int ifbS = spu_andc(ifbZS, mask24); 00114 00115 if (spu.depth_stencil_alpha.stencil[0].enabled) { 00116 /* do stencil test */ 00117 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); 00118 00119 } 00120 else if (spu.depth_stencil_alpha.depth.enabled) { 00121 /* do depth test */ 00122 00123 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || 00124 spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); 00125 00126 vector unsigned int ifragZ; 00127 vector unsigned int zmask; 00128 00129 /* convert four fragZ from float to uint */ 00130 fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); 00131 ifragZ = spu_convtu(fragZ, 0); 00132 00133 /* do depth comparison, setting zmask with results */ 00134 switch (spu.depth_stencil_alpha.depth.func) { 00135 case PIPE_FUNC_LESS: 00136 zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ 00137 break; 00138 case PIPE_FUNC_GREATER: 00139 zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ 00140 break; 00141 case PIPE_FUNC_GEQUAL: 00142 zmask = spu_cmpgt(ifbZ, ifragZ); 00143 zmask = spu_nor(zmask, zmask); 00144 break; 00145 case PIPE_FUNC_LEQUAL: 00146 zmask = spu_cmpgt(ifragZ, ifbZ); 00147 zmask = spu_nor(zmask, zmask); 00148 break; 00149 case PIPE_FUNC_EQUAL: 00150 zmask = spu_cmpeq(ifbZ, ifragZ); 00151 break; 00152 case PIPE_FUNC_NOTEQUAL: 00153 zmask = spu_cmpeq(ifbZ, ifragZ); 00154 zmask = spu_nor(zmask, zmask); 00155 break; 00156 case PIPE_FUNC_ALWAYS: 00157 zmask = spu_splats(0xffffffffU); 00158 break; 00159 case PIPE_FUNC_NEVER: 00160 zmask = spu_splats( 0x0U); 00161 break; 00162 default: 00163 ; 00164 } 00165 00166 mask = spu_and(mask, zmask); 00167 00168 /* merge framebuffer Z and fragment Z according to the mask */ 00169 ifbZ = spu_or(spu_and(ifragZ, mask), 00170 spu_andc(ifbZ, mask)); 00171 } 00172 00173 if (spu_extract(spu_orx(mask), 0)) { 00174 /* put new fragment Z/Stencil values back into Z/Stencil tile */ 00175 depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); 00176 00177 spu.cur_ztile_status = TILE_STATUS_DIRTY; 00178 } 00179 } 00180 00181 if (spu.blend.blend_enable) { 00182 /* blending terms, misc regs */ 00183 vector float term1r, term1g, term1b, term1a; 00184 vector float term2r, term2g, term2b, term2a; 00185 vector float one, tmp; 00186 00187 vector float fbRGBA[4]; /* current framebuffer colors */ 00188 00189 /* get colors from framebuffer/tile */ 00190 { 00191 vector float fc[4]; 00192 uint c0, c1, c2, c3; 00193 00194 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ 00195 c0 = colorTile->ui[y][x*2+0]; 00196 c1 = colorTile->ui[y][x*2+1]; 00197 c2 = colorTile->ui[y][x*2+2]; 00198 c3 = colorTile->ui[y][x*2+3]; 00199 #else 00200 c0 = colorTile->ui[y+0][x+0]; 00201 c1 = colorTile->ui[y+0][x+1]; 00202 c2 = colorTile->ui[y+1][x+0]; 00203 c3 = colorTile->ui[y+1][x+1]; 00204 #endif 00205 switch (spu.fb.color_format) { 00206 case PIPE_FORMAT_B8G8R8A8_UNORM: 00207 fc[0] = spu_unpack_B8G8R8A8(c0); 00208 fc[1] = spu_unpack_B8G8R8A8(c1); 00209 fc[2] = spu_unpack_B8G8R8A8(c2); 00210 fc[3] = spu_unpack_B8G8R8A8(c3); 00211 break; 00212 case PIPE_FORMAT_A8R8G8B8_UNORM: 00213 fc[0] = spu_unpack_A8R8G8B8(c0); 00214 fc[1] = spu_unpack_A8R8G8B8(c1); 00215 fc[2] = spu_unpack_A8R8G8B8(c2); 00216 fc[3] = spu_unpack_A8R8G8B8(c3); 00217 break; 00218 default: 00219 ASSERT(0); 00220 } 00221 _transpose_matrix4x4(fbRGBA, fc); 00222 } 00223 00224 /* 00225 * Compute Src RGB terms 00226 */ 00227 switch (spu.blend.rgb_src_factor) { 00228 case PIPE_BLENDFACTOR_ONE: 00229 term1r = fragR; 00230 term1g = fragG; 00231 term1b = fragB; 00232 break; 00233 case PIPE_BLENDFACTOR_ZERO: 00234 term1r = 00235 term1g = 00236 term1b = spu_splats(0.0f); 00237 break; 00238 case PIPE_BLENDFACTOR_SRC_COLOR: 00239 term1r = spu_mul(fragR, fragR); 00240 term1g = spu_mul(fragG, fragG); 00241 term1b = spu_mul(fragB, fragB); 00242 break; 00243 case PIPE_BLENDFACTOR_SRC_ALPHA: 00244 term1r = spu_mul(fragR, fragA); 00245 term1g = spu_mul(fragG, fragA); 00246 term1b = spu_mul(fragB, fragA); 00247 break; 00248 /* XXX more cases */ 00249 default: 00250 ASSERT(0); 00251 } 00252 00253 /* 00254 * Compute Src Alpha term 00255 */ 00256 switch (spu.blend.alpha_src_factor) { 00257 case PIPE_BLENDFACTOR_ONE: 00258 term1a = fragA; 00259 break; 00260 case PIPE_BLENDFACTOR_SRC_COLOR: 00261 term1a = spu_splats(0.0f); 00262 break; 00263 case PIPE_BLENDFACTOR_SRC_ALPHA: 00264 term1a = spu_mul(fragA, fragA); 00265 break; 00266 /* XXX more cases */ 00267 default: 00268 ASSERT(0); 00269 } 00270 00271 /* 00272 * Compute Dest RGB terms 00273 */ 00274 switch (spu.blend.rgb_dst_factor) { 00275 case PIPE_BLENDFACTOR_ONE: 00276 term2r = fragR; 00277 term2g = fragG; 00278 term2b = fragB; 00279 break; 00280 case PIPE_BLENDFACTOR_ZERO: 00281 term2r = 00282 term2g = 00283 term2b = spu_splats(0.0f); 00284 break; 00285 case PIPE_BLENDFACTOR_SRC_COLOR: 00286 term2r = spu_mul(fbRGBA[0], fragR); 00287 term2g = spu_mul(fbRGBA[1], fragG); 00288 term2b = spu_mul(fbRGBA[2], fragB); 00289 break; 00290 case PIPE_BLENDFACTOR_SRC_ALPHA: 00291 term2r = spu_mul(fbRGBA[0], fragA); 00292 term2g = spu_mul(fbRGBA[1], fragA); 00293 term2b = spu_mul(fbRGBA[2], fragA); 00294 break; 00295 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 00296 one = spu_splats(1.0f); 00297 tmp = spu_sub(one, fragA); 00298 term2r = spu_mul(fbRGBA[0], tmp); 00299 term2g = spu_mul(fbRGBA[1], tmp); 00300 term2b = spu_mul(fbRGBA[2], tmp); 00301 break; 00302 /* XXX more cases */ 00303 default: 00304 ASSERT(0); 00305 } 00306 00307 /* 00308 * Compute Dest Alpha term 00309 */ 00310 switch (spu.blend.alpha_dst_factor) { 00311 case PIPE_BLENDFACTOR_ONE: 00312 term2a = fragA; 00313 break; 00314 case PIPE_BLENDFACTOR_SRC_COLOR: 00315 term2a = spu_splats(0.0f); 00316 break; 00317 case PIPE_BLENDFACTOR_SRC_ALPHA: 00318 term2a = spu_mul(fbRGBA[3], fragA); 00319 break; 00320 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 00321 one = spu_splats(1.0f); 00322 tmp = spu_sub(one, fragA); 00323 term2a = spu_mul(fbRGBA[3], tmp); 00324 break; 00325 /* XXX more cases */ 00326 default: 00327 ASSERT(0); 00328 } 00329 00330 /* 00331 * Combine Src/Dest RGB terms 00332 */ 00333 switch (spu.blend.rgb_func) { 00334 case PIPE_BLEND_ADD: 00335 fragR = spu_add(term1r, term2r); 00336 fragG = spu_add(term1g, term2g); 00337 fragB = spu_add(term1b, term2b); 00338 break; 00339 case PIPE_BLEND_SUBTRACT: 00340 fragR = spu_sub(term1r, term2r); 00341 fragG = spu_sub(term1g, term2g); 00342 fragB = spu_sub(term1b, term2b); 00343 break; 00344 /* XXX more cases */ 00345 default: 00346 ASSERT(0); 00347 } 00348 00349 /* 00350 * Combine Src/Dest A term 00351 */ 00352 switch (spu.blend.alpha_func) { 00353 case PIPE_BLEND_ADD: 00354 fragA = spu_add(term1a, term2a); 00355 break; 00356 case PIPE_BLEND_SUBTRACT: 00357 fragA = spu_sub(term1a, term2a); 00358 break; 00359 /* XXX more cases */ 00360 default: 00361 ASSERT(0); 00362 } 00363 } 00364 00365 00366 /* 00367 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. 00368 */ 00369 #if 0 00370 /* original code */ 00371 { 00372 vector float frag_soa[4]; 00373 frag_soa[0] = fragR; 00374 frag_soa[1] = fragG; 00375 frag_soa[2] = fragB; 00376 frag_soa[3] = fragA; 00377 _transpose_matrix4x4(frag_aos, frag_soa); 00378 } 00379 #else 00380 /* short-cut relying on function parameter layout: */ 00381 _transpose_matrix4x4(frag_aos, &fragR); 00382 (void) fragG; 00383 (void) fragB; 00384 #endif 00385 00386 /* 00387 * Pack float colors into 32-bit RGBA words. 00388 */ 00389 switch (spu.fb.color_format) { 00390 case PIPE_FORMAT_A8R8G8B8_UNORM: 00391 c0 = spu_pack_A8R8G8B8(frag_aos[0]); 00392 c1 = spu_pack_A8R8G8B8(frag_aos[1]); 00393 c2 = spu_pack_A8R8G8B8(frag_aos[2]); 00394 c3 = spu_pack_A8R8G8B8(frag_aos[3]); 00395 break; 00396 00397 case PIPE_FORMAT_B8G8R8A8_UNORM: 00398 c0 = spu_pack_B8G8R8A8(frag_aos[0]); 00399 c1 = spu_pack_B8G8R8A8(frag_aos[1]); 00400 c2 = spu_pack_B8G8R8A8(frag_aos[2]); 00401 c3 = spu_pack_B8G8R8A8(frag_aos[3]); 00402 break; 00403 default: 00404 fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); 00405 ASSERT(0); 00406 } 00407 00408 00409 /* 00410 * Color masking 00411 */ 00412 if (spu.blend.colormask != 0xf) { 00413 /* XXX to do */ 00414 /* apply color mask to 32-bit packed colors */ 00415 } 00416 00417 00418 /* 00419 * Logic Ops 00420 */ 00421 if (spu.blend.logicop_enable) { 00422 /* XXX to do */ 00423 /* apply logicop to 32-bit packed colors */ 00424 } 00425 00426 00427 /* 00428 * If mask is non-zero, mark tile as dirty. 00429 */ 00430 if (spu_extract(spu_orx(mask), 0)) { 00431 spu.cur_ctile_status = TILE_STATUS_DIRTY; 00432 } 00433 else { 00434 return; 00435 } 00436 00437 00438 /* 00439 * Write new quad colors to the framebuffer/tile. 00440 * Only write pixels where the corresponding mask word is set. 00441 */ 00442 #if LINEAR_QUAD_LAYOUT 00443 /* 00444 * Quad layout: 00445 * +--+--+--+--+ 00446 * |p0|p1|p2|p3| 00447 * +--+--+--+--+ 00448 */ 00449 if (spu_extract(mask, 0)) 00450 colorTile->ui[y][x*2] = c0; 00451 if (spu_extract(mask, 1)) 00452 colorTile->ui[y][x*2+1] = c1; 00453 if (spu_extract(mask, 2)) 00454 colorTile->ui[y][x*2+2] = c2; 00455 if (spu_extract(mask, 3)) 00456 colorTile->ui[y][x*2+3] = c3; 00457 #else 00458 /* 00459 * Quad layout: 00460 * +--+--+ 00461 * |p0|p1| 00462 * +--+--+ 00463 * |p2|p3| 00464 * +--+--+ 00465 */ 00466 if (spu_extract(mask, 0)) 00467 colorTile->ui[y+0][x+0] = c0; 00468 if (spu_extract(mask, 1)) 00469 colorTile->ui[y+0][x+1] = c1; 00470 if (spu_extract(mask, 2)) 00471 colorTile->ui[y+1][x+0] = c2; 00472 if (spu_extract(mask, 3)) 00473 colorTile->ui[y+1][x+1] = c3;